@cyclonedx/cdxgen
Version:
Creates CycloneDX Software Bill of Materials (SBOM) from source or container image
1,509 lines (1,457 loc) • 78.4 kB
JavaScript
import { createHash } from "node:crypto";
import { readdirSync, readFileSync, realpathSync, statSync } from "node:fs";
import { dirname, join, relative, resolve } from "node:path";
import process from "node:process";
import { createBom } from "../cli/index.js";
import { DEFAULT_HBOM_AUDIT_CATEGORIES } from "../helpers/auditCategories.js";
import {
getCycloneDxFormat,
getNonCycloneDxErrorMessage,
isCycloneDxBom,
} from "../helpers/bomUtils.js";
import { thoughtLog } from "../helpers/logger.js";
import {
hasRegistryProvenanceEvidenceProperties,
hasTrustedPublishingProperties,
} from "../helpers/provenanceUtils.js";
import {
cleanupSourceDir,
findGitRefForPurlVersion,
hardenedGitCommand,
resolveGitUrlFromPurl,
resolvePurlSourceDirectory,
sanitizeRemoteUrlForLogs,
} from "../helpers/source.js";
import {
dirNameStr,
getTmpDir,
isDryRun,
recordActivity,
safeExistsSync,
safeMkdirSync,
safeMkdtempSync,
safeRmSync,
safeWriteSync,
} from "../helpers/utils.js";
import {
auditBom,
isHbomLikeBom,
isObomLikeBom,
} from "../stages/postgen/auditBom.js";
import { postProcess } from "../stages/postgen/postgen.js";
import { formatTargetLabel } from "./progress.js";
import { renderAuditReport } from "./reporters.js";
import {
SEVERITY_ORDER,
scoreTargetRisk,
severityMeetsThreshold,
} from "./scoring.js";
import {
collectAuditTargets,
enrichInputBomsWithRegistryMetadata,
normalizePackageName,
} from "./targets.js";
export const DEFAULT_AUDIT_CATEGORIES = [
"ai-agent",
"ci-permission",
"dependency-source",
"package-integrity",
];
const DIRECT_AUDIT_TOOL_NAME = "cdx-audit";
const AUDIT_CACHE_DIRNAME = ".cdx-audit";
const AUDIT_CACHE_BOM_FILE = "source-bom.json";
const AUDIT_CACHE_META_FILE = "source-bom.meta.json";
const CLONE_RETRY_DELAYS_MS = [750, 1500];
const LARGE_PREDICTIVE_AUDIT_THRESHOLD = 50;
const VERY_LARGE_PREDICTIVE_AUDIT_THRESHOLD = 100;
const PYTHON_METADATA_FILES = ["pyproject.toml", "setup.cfg", "setup.py"];
const PYTHON_HEURISTIC_FILENAMES = new Set(["setup.py", "__init__.py"]);
const PYTHON_HEURISTIC_FILE_LIMIT = 32;
const PYTHON_HEURISTIC_MAX_FILE_BYTES = 256 * 1024;
const PYTHON_SKIP_DIRS = new Set([
".git",
".hg",
".tox",
".venv",
"__pycache__",
"build",
"dist",
"node_modules",
"site-packages",
"venv",
]);
const PYTHON_EXECUTION_PATTERN =
/\b(?:exec|eval|compile)\s*\(|\b(?:subprocess\.(?:Popen|run|call|check_output)|os\.(?:system|popen))\b/i;
const PYTHON_NETWORK_PATTERN =
/\b(?:requests\.(?:get|post|put|patch)|urllib(?:\.request)?\.urlopen|http\.client|socket\.socket)\b/i;
const PYTHON_OBFUSCATION_PATTERN =
/\b(?:base64\.(?:b64decode|urlsafe_b64decode)|binascii\.a2b_base64|marshal\.loads|zlib\.decompress|codecs\.decode\s*\([^)]*base64|bytes\.fromhex)\b/i;
const PYTHON_SETUP_CMDCLASS_PATTERN = /\bcmdclass\s*=/i;
/**
* Read and validate a CycloneDX BOM file.
*
* @param {string} bomPath BOM file path
* @returns {object} parsed CycloneDX BOM
*/
export function loadBomFile(bomPath) {
const resolvedPath = resolve(bomPath);
let bomJson;
try {
bomJson = JSON.parse(readFileSync(resolvedPath, "utf8"));
} catch (error) {
throw new Error(`Failed to parse ${resolvedPath}: ${error.message}`);
}
if (!isCycloneDxBom(bomJson)) {
throw new Error(getNonCycloneDxErrorMessage(bomJson, "cdx-audit"));
}
return bomJson;
}
/**
* Recursively list JSON files under a BOM directory.
*
* @param {string} bomDir directory path
* @returns {string[]} discovered file paths
*/
export function listBomFiles(bomDir) {
const foundFiles = [];
const queue = [resolve(bomDir)];
while (queue.length) {
const currentDir = queue.shift();
const entries = readdirSync(currentDir, { withFileTypes: true });
for (const entry of entries) {
const entryPath = join(currentDir, entry.name);
if (entry.isDirectory()) {
queue.push(entryPath);
continue;
}
if (entry.isFile() && entry.name.endsWith(".json")) {
foundFiles.push(entryPath);
}
}
}
return foundFiles.sort();
}
/**
* Load input BOM files from either a single file or a directory.
*
* @param {object} options CLI options
* @returns {{ source: string, bomJson: object }[]} loaded input BOMs
*/
export function loadInputBoms(options) {
const inputBoms = [];
if (options.bom) {
inputBoms.push({
bomJson: loadBomFile(options.bom),
source: resolve(options.bom),
});
}
if (options.bomDir) {
const bomFiles = listBomFiles(options.bomDir);
for (const bomFile of bomFiles) {
try {
inputBoms.push({
bomJson: loadBomFile(bomFile),
source: bomFile,
});
} catch (error) {
console.warn(
`Skipping non-CycloneDX JSON file '${bomFile}': ${error.message}`,
);
}
}
}
return inputBoms;
}
function summarizeDirectAuditFindings(findings = []) {
const findingsBySeverity = {
critical: 0,
high: 0,
low: 0,
medium: 0,
};
let maxSeverity = "none";
for (const finding of findings) {
const severity = finding?.severity || "low";
if (findingsBySeverity[severity] !== undefined) {
findingsBySeverity[severity] += 1;
}
if (
(SEVERITY_ORDER[severity] ?? -1) > (SEVERITY_ORDER[maxSeverity] ?? -1)
) {
maxSeverity = severity;
}
}
return {
findingsBySeverity,
findingsCount: findings.length,
maxSeverity,
};
}
function buildDirectAuditOptions(bomJson, options = {}) {
const explicitCategories = options.categories?.length
? options.categories.join(",")
: undefined;
return {
bomAuditCategories:
explicitCategories ||
(isHbomLikeBom(bomJson)
? DEFAULT_HBOM_AUDIT_CATEGORIES
: isObomLikeBom(bomJson)
? "obom-runtime"
: undefined),
bomAuditMinSeverity: options.minSeverity || "low",
bomAuditRulesDir: options.rulesDir,
};
}
export async function runDirectBomAuditFromBoms(inputBoms, options = {}) {
if (!inputBoms.length) {
throw new Error("No CycloneDX BOM inputs were found.");
}
const results = [];
for (const inputBom of inputBoms) {
const directAuditOptions = buildDirectAuditOptions(
inputBom.bomJson,
options,
);
const findings = await auditBom(inputBom.bomJson, directAuditOptions);
results.push({
auditOptions: directAuditOptions,
bomFormat: getCycloneDxFormat(inputBom.bomJson),
findings,
serialNumber: inputBom.bomJson?.serialNumber,
source: inputBom.source,
specVersion: inputBom.bomJson?.specVersion,
status: "audited",
summary: summarizeDirectAuditFindings(findings),
});
}
const summary = {
findingsBySeverity: {
critical: 0,
high: 0,
low: 0,
medium: 0,
},
inputBomCount: inputBoms.length,
maxSeverity: "none",
totalFindings: 0,
bomsWithFindings: 0,
};
for (const result of results) {
summary.totalFindings += result.summary.findingsCount;
if (result.summary.findingsCount > 0) {
summary.bomsWithFindings += 1;
}
for (const [severity, count] of Object.entries(
result.summary.findingsBySeverity,
)) {
summary.findingsBySeverity[severity] += count;
}
if (
(SEVERITY_ORDER[result.summary.maxSeverity] ?? -1) >
(SEVERITY_ORDER[summary.maxSeverity] ?? -1)
) {
summary.maxSeverity = result.summary.maxSeverity;
}
}
return {
auditMode: "direct",
generatedAt: new Date().toISOString(),
inputs: inputBoms.map((inputBom) => inputBom.source),
results,
summary,
tool: {
name: DIRECT_AUDIT_TOOL_NAME,
version: readPackageVersion(),
},
};
}
/**
* Read the package version from the local package.json file.
*
* @returns {string} package version
*/
function readPackageVersion() {
const packageJson = JSON.parse(
readFileSync(join(dirNameStr, "package.json"), "utf8"),
);
return packageJson.version;
}
/**
* Build a deterministic directory-safe slug for report and workspace paths.
*
* @param {object} target audit target
* @returns {string} slug string
*/
function targetSlug(target) {
const packageName = target.namespace
? `${target.namespace}-${target.name}`
: target.name;
const normalized = normalizePackageName(packageName)
.replace(/[^a-z0-9-]/g, "-")
.replace(/-+/g, "-")
.replace(/^-|-$/g, "");
const version = normalizePackageName(target.version || "latest") || "latest";
const digest = createHash("sha256")
.update(target.purl)
.digest("hex")
.slice(0, 12);
return `${target.type}-${normalized || "package"}-${version}-${digest}`;
}
/**
* Ensure a parent directory exists before writing a file.
*
* @param {string} filePath file path to create
* @param {string} content file content
* @returns {void}
*/
function writeTextFile(filePath, content) {
const parentDir = dirname(filePath);
if (!safeExistsSync(parentDir)) {
safeMkdirSync(parentDir, { recursive: true });
}
safeWriteSync(filePath, content);
}
/**
* Ensure a parent directory exists before writing JSON.
*
* @param {string} filePath file path to create
* @param {object} payload JSON payload
* @returns {void}
*/
function writeJsonFile(filePath, payload) {
writeTextFile(filePath, `${JSON.stringify(payload, null, 2)}\n`);
}
function sleep(ms) {
return new Promise((resolvePromise) => {
setTimeout(resolvePromise, ms);
});
}
function isPathWithin(parentDir, childPath) {
const normalizePath = (candidatePath) => {
try {
return realpathSync.native
? realpathSync.native(candidatePath)
: realpathSync(candidatePath);
} catch {
return resolve(candidatePath);
}
};
const normalizedChild = normalizePath(childPath);
const candidateParents = [parentDir];
if (process.platform !== "win32") {
candidateParents.push("/tmp");
candidateParents.push("/private/tmp");
}
return candidateParents.some((candidateParent) => {
const normalizedParent = normalizePath(candidateParent);
return (
normalizedChild === normalizedParent ||
normalizedChild.startsWith(`${normalizedParent}/`)
);
});
}
function isTemporaryWorkspaceDir(workspaceDir) {
return workspaceDir ? isPathWithin(getTmpDir(), workspaceDir) : false;
}
function prepareWorkspaceContext(options = {}) {
if (!options.workspaceDir) {
return {
cleanupOnFinish: false,
workspaceDir: undefined,
};
}
const workspaceDir = resolve(options.workspaceDir);
const existed = safeExistsSync(workspaceDir);
if (!existed) {
safeMkdirSync(workspaceDir, { recursive: true });
}
return {
cleanupOnFinish: !existed && isTemporaryWorkspaceDir(workspaceDir),
workspaceDir,
};
}
function getWorkspaceTargetDir(workspaceDir, target) {
return join(resolve(workspaceDir), targetSlug(target));
}
function getWorkspaceCachePaths(workspaceDir, target) {
const targetDir = getWorkspaceTargetDir(workspaceDir, target);
const cacheDir = join(targetDir, AUDIT_CACHE_DIRNAME);
return {
cacheDir,
metadataFile: join(cacheDir, AUDIT_CACHE_META_FILE),
sourceBomFile: join(cacheDir, AUDIT_CACHE_BOM_FILE),
targetDir,
};
}
function loadCachedChildBom(workspaceDir, target) {
if (!workspaceDir) {
return undefined;
}
const cachePaths = getWorkspaceCachePaths(workspaceDir, target);
if (!safeExistsSync(cachePaths.sourceBomFile)) {
return undefined;
}
try {
const bomJson = loadBomFile(cachePaths.sourceBomFile);
let metadata = {};
if (safeExistsSync(cachePaths.metadataFile)) {
metadata = JSON.parse(readFileSync(cachePaths.metadataFile, "utf8"));
}
const scanDir = metadata.scanDirRelative
? resolve(cachePaths.targetDir, metadata.scanDirRelative)
: cachePaths.targetDir;
return {
bomJson,
cacheDir: cachePaths.cacheDir,
repoUrl: metadata.repoUrl,
resolution: metadata.resolution,
scanDir,
sourceDirectoryConfidence: metadata.sourceDirectoryConfidence || "medium",
versionMatched: metadata.versionMatched !== false,
};
} catch {
return undefined;
}
}
function writeCachedChildBom(workspaceDir, target, payload) {
if (!workspaceDir || !payload?.bomJson) {
return;
}
const cachePaths = getWorkspaceCachePaths(workspaceDir, target);
safeMkdirSync(cachePaths.cacheDir, { recursive: true });
writeJsonFile(cachePaths.sourceBomFile, payload.bomJson);
writeJsonFile(cachePaths.metadataFile, {
generatedAt: new Date().toISOString(),
repoUrl: payload.repoUrl,
resolution: payload.resolution,
scanDirRelative: payload.scanDir
? relative(cachePaths.targetDir, resolve(payload.scanDir)) || "."
: ".",
sourceDirectoryConfidence: payload.sourceDirectoryConfidence,
versionMatched: payload.versionMatched,
});
}
function persistAuditArtifacts(result, options, sourceBomJson) {
if (!options.reportsDir) {
return result;
}
const resultDir = join(
resolve(options.reportsDir),
targetSlug(result.target),
);
safeMkdirSync(resultDir, { recursive: true });
result.reportDir = resultDir;
result.findingsFile = join(resultDir, "findings.json");
result.summaryFile = join(resultDir, "summary.json");
if (sourceBomJson) {
result.sourceBomFile = join(resultDir, "source-bom.json");
writeJsonFile(result.sourceBomFile, sourceBomJson);
}
writeJsonFile(result.findingsFile, result.findings || []);
writeJsonFile(result.summaryFile, {
assessment: result.assessment,
cacheHit: result.cacheHit || false,
error: result.error,
errorType: result.errorType,
findingsCount: result.findings?.length || 0,
repoUrl: result.repoUrl,
sourceDirectoryConfidence: result.sourceDirectoryConfidence,
status: result.status,
target: result.target,
});
return result;
}
/**
* Emit a progress event when a callback is configured.
*
* @param {object} options CLI options
* @param {object} event progress event payload
* @returns {void}
*/
function emitProgress(options, event) {
if (typeof options?.onProgress === "function") {
options.onProgress(event);
}
}
function buildPredictiveAuditEstimate(selectedTargets) {
if (selectedTargets >= VERY_LARGE_PREDICTIVE_AUDIT_THRESHOLD) {
return "This may take 10+ minutes depending on repository lookups and child SBOM generation.";
}
if (selectedTargets >= LARGE_PREDICTIVE_AUDIT_THRESHOLD) {
return "This may take several minutes depending on repository lookups and child SBOM generation.";
}
return undefined;
}
function buildPredictiveAuditPreflightMessage(extractedTargets, options) {
const selectedTargets = extractedTargets?.targets?.length || 0;
const allowlistedTargetsExcluded =
extractedTargets?.stats?.allowlistedTargetsExcluded || 0;
const availableTargets = extractedTargets?.stats?.availableTargets || 0;
const requiredTargets = extractedTargets?.stats?.requiredTargets || 0;
const trustedTargetsExcluded =
extractedTargets?.stats?.trustedTargetsExcluded || 0;
const truncatedTargets = extractedTargets?.stats?.truncatedTargets || 0;
const estimate = buildPredictiveAuditEstimate(selectedTargets);
const trustedHint = options?.trustedSelectionHelp
? ` ${options.trustedSelectionHelp}`
: "";
const trustedExclusionMessage = trustedTargetsExcluded
? ` Skipping ${trustedTargetsExcluded} trusted-publishing-backed package(s) by default.${trustedHint}`
: "";
const customAllowlistSuffix = options?.allowlistFile
? " and your custom allowlist"
: "";
const allowlistExclusionMessage = allowlistedTargetsExcluded
? ` Skipping ${allowlistedTargetsExcluded} allowlisted package(s) using the built-in well-known purl prefix filter${customAllowlistSuffix}.`
: "";
if (!estimate && availableTargets < LARGE_PREDICTIVE_AUDIT_THRESHOLD) {
const passiveMessage =
`${trustedExclusionMessage}${allowlistExclusionMessage}`.trim();
return passiveMessage || undefined;
}
if (options?.scope === "required") {
return `Predictive audit will scan ${selectedTargets} required package(s). ${estimate || "Large required-only scans may still take a while depending on repository lookups and child SBOM generation."}${trustedExclusionMessage}${allowlistExclusionMessage}`;
}
if (truncatedTargets > 0) {
const additionalTargets = Math.max(0, selectedTargets - requiredTargets);
return `Predictive audit selected ${selectedTargets} of ${availableTargets} package(s) (${requiredTargets} required${additionalTargets ? ` + ${additionalTargets} additional` : ""}) using required-first prioritization. ${estimate || "This run was trimmed to keep audit time reasonable."}${trustedExclusionMessage}${allowlistExclusionMessage}`;
}
return `Predictive audit will scan ${selectedTargets} package(s). ${estimate || "Large predictive audits may still take a while depending on repository lookups and child SBOM generation."}${trustedExclusionMessage}${allowlistExclusionMessage}`;
}
/**
* Read a custom property from a target descriptor.
*
* @param {object} target audit target
* @param {string} propertyName property name
* @returns {string | undefined} property value
*/
function getTargetProperty(target, propertyName) {
return target?.properties?.find((property) => property.name === propertyName)
?.value;
}
function getTargetNumberProperty(target, propertyName) {
const value = getTargetProperty(target, propertyName);
if (!value) {
return undefined;
}
const numericValue = Number(value);
return Number.isFinite(numericValue) ? numericValue : undefined;
}
function getTargetTimestampProperty(target, propertyName) {
const value = getTargetProperty(target, propertyName);
if (!value) {
return undefined;
}
const timestamp = Date.parse(value);
return Number.isNaN(timestamp) ? undefined : timestamp;
}
function getTargetListProperty(target, propertyName) {
const value = getTargetProperty(target, propertyName);
if (!value) {
return [];
}
return [
...new Set(
value
.split(",")
.map((entry) => entry.trim())
.filter(Boolean),
),
];
}
function isEstablishedPackage(target, propertyPrefix) {
const packageCreatedTime = getTargetTimestampProperty(
target,
`${propertyPrefix}:packageCreatedTime`,
);
const versionCount = getTargetNumberProperty(
target,
`${propertyPrefix}:versionCount`,
);
if (!packageCreatedTime || !versionCount) {
return false;
}
const packageAgeMs = Date.now() - packageCreatedTime;
return packageAgeMs >= 1000 * 60 * 60 * 24 * 30 && versionCount >= 3;
}
function isRecentRelease(target, propertyPrefix) {
const publishTime = getTargetTimestampProperty(
target,
`${propertyPrefix}:publishTime`,
);
if (!publishTime) {
return false;
}
const releaseAgeMs = Date.now() - publishTime;
return releaseAgeMs >= 0 && releaseAgeMs <= 1000 * 60 * 60 * 72;
}
function hasPublisherDrift(target, propertyPrefix) {
return (
getTargetProperty(target, `${propertyPrefix}:publisherDrift`) === "true"
);
}
function hasMaintainerSetDrift(target, propertyPrefix) {
return (
getTargetProperty(target, `${propertyPrefix}:maintainerSetDrift`) ===
"true" ||
getTargetProperty(target, `${propertyPrefix}:uploaderSetDrift`) === "true"
);
}
function hasPartialIdentitySetDrift(target, propertyPrefix) {
const explicitPropertyName =
propertyPrefix === "cdx:npm"
? `${propertyPrefix}:maintainerSetPartialDrift`
: `${propertyPrefix}:uploaderSetPartialDrift`;
if (getTargetProperty(target, explicitPropertyName) === "true") {
return true;
}
const currentPropertyName =
propertyPrefix === "cdx:npm"
? `${propertyPrefix}:maintainerSet`
: `${propertyPrefix}:uploaderSet`;
const priorPropertyName =
propertyPrefix === "cdx:npm"
? `${propertyPrefix}:priorMaintainerSet`
: `${propertyPrefix}:priorUploaderSet`;
const currentSet = getTargetListProperty(target, currentPropertyName);
const priorSet = getTargetListProperty(target, priorPropertyName);
if (!currentSet.length || !priorSet.length) {
return false;
}
const priorValues = new Set(priorSet);
const overlapCount = currentSet.filter((value) =>
priorValues.has(value),
).length;
if (overlapCount === 0) {
return false;
}
const unionCount = new Set([...currentSet, ...priorSet]).size;
return (
overlapCount < unionCount &&
(overlapCount < currentSet.length || overlapCount < priorSet.length)
);
}
function hasDormantReleaseGapAnomaly(target, propertyPrefix) {
const currentGapDays = getTargetNumberProperty(
target,
`${propertyPrefix}:releaseGapDays`,
);
const baselineGapDays = getTargetNumberProperty(
target,
`${propertyPrefix}:releaseGapBaselineDays`,
);
const sampleSize = getTargetNumberProperty(
target,
`${propertyPrefix}:releaseGapSampleSize`,
);
if (!currentGapDays || !baselineGapDays || !sampleSize || sampleSize < 3) {
return false;
}
return currentGapDays >= Math.max(90, baselineGapDays * 8);
}
function hasCompressedCadence(target, propertyPrefix) {
if (
getTargetProperty(target, `${propertyPrefix}:compressedCadence`) === "true"
) {
return true;
}
const currentGapDays = getTargetNumberProperty(
target,
`${propertyPrefix}:releaseGapDays`,
);
const baselineGapDays = getTargetNumberProperty(
target,
`${propertyPrefix}:releaseGapBaselineDays`,
);
const sampleSize = getTargetNumberProperty(
target,
`${propertyPrefix}:releaseGapSampleSize`,
);
if (
currentGapDays === undefined ||
baselineGapDays === undefined ||
sampleSize === undefined ||
sampleSize < 3 ||
currentGapDays <= 0 ||
baselineGapDays <= 0 ||
baselineGapDays < 21
) {
return false;
}
return currentGapDays <= 14 && currentGapDays / baselineGapDays <= 0.33;
}
/**
* Build low-noise provenance-aware contextual findings from the root BOM target.
*
* These are intentionally conservative and only fire when there is explicit risk
* posture already present in the target metadata.
*
* @param {object} target audit target
* @returns {object[]} contextual findings
*/
export function buildTargetContextFindings(target) {
const findings = [];
const hasTrustedPublishing = hasTrustedPublishingProperties(
target?.properties,
);
const hasProvenanceEvidence = hasRegistryProvenanceEvidenceProperties(
target?.properties,
);
if (target.type === "npm") {
const hasInstallScript =
getTargetProperty(target, "cdx:npm:hasInstallScript") === "true";
const establishedPackage = isEstablishedPackage(target, "cdx:npm");
const recentRelease = isRecentRelease(target, "cdx:npm");
const publisherDrift = hasPublisherDrift(target, "cdx:npm");
const maintainerSetDrift = hasMaintainerSetDrift(target, "cdx:npm");
const partialMaintainerSetDrift = hasPartialIdentitySetDrift(
target,
"cdx:npm",
);
const dormantReleaseGapAnomaly = hasDormantReleaseGapAnomaly(
target,
"cdx:npm",
);
const compressedCadence = hasCompressedCadence(target, "cdx:npm");
if (
target.version &&
hasInstallScript &&
!hasTrustedPublishing &&
!hasProvenanceEvidence
) {
findings.push({
category: "package-integrity",
description:
"Install-time execution combined with missing registry-visible provenance raises future tampering risk.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `npm package '${target.name}@${target.version}' has install-time execution hooks but no registry-visible trusted publishing or provenance evidence.`,
mitigation:
"Prefer versions with registry-visible provenance evidence, review install scripts carefully, and pin/allowlist publishers for high-risk packages.",
ruleId: "PROV-001",
severity: "medium",
});
}
if (
target.version &&
establishedPackage &&
recentRelease &&
hasInstallScript &&
!hasTrustedPublishing &&
!hasProvenanceEvidence
) {
findings.push({
category: "package-integrity",
description:
"A very recent release on a mature package, combined with install-time execution and missing provenance, deserves extra scrutiny before adoption.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `npm package '${target.name}@${target.version}' is a very recent release on an established package and still lacks registry-visible provenance.`,
mitigation:
"Delay adoption briefly, verify publisher identity, and prefer registry-visible provenance for high-risk packages with install hooks.",
ruleId: "PROV-003",
severity: "low",
});
}
if (
target.version &&
establishedPackage &&
publisherDrift &&
hasInstallScript &&
!hasTrustedPublishing &&
!hasProvenanceEvidence
) {
findings.push({
category: "package-integrity",
description:
"Publisher drift on mature packages can be legitimate, but becomes more concerning when install-time execution is present and provenance is weak.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `npm package '${target.name}@${target.version}' was published by a different identity than the prior release and lacks registry-visible provenance.`,
mitigation:
"Review maintainer changes, compare the prior release publisher, and validate provenance before upgrading execution-capable packages.",
ruleId: "PROV-004",
severity: "medium",
});
}
if (
target.version &&
establishedPackage &&
maintainerSetDrift &&
hasInstallScript &&
!hasTrustedPublishing &&
!hasProvenanceEvidence
) {
findings.push({
category: "package-integrity",
description:
"Maintainer-set drift on execution-capable packages is a triage signal when the resolved release also lacks registry-visible provenance.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `npm package '${target.name}@${target.version}' has a fully different maintainer identity set than the prior release and lacks registry-visible provenance.`,
mitigation:
"Compare the prior and current maintainer sets, verify maintainer transitions, and prefer releases with provenance before upgrading packages with install hooks.",
ruleId: "PROV-007",
severity: "medium",
});
}
if (
target.version &&
establishedPackage &&
partialMaintainerSetDrift &&
!maintainerSetDrift &&
hasInstallScript &&
!hasTrustedPublishing &&
!hasProvenanceEvidence
) {
findings.push({
category: "package-integrity",
description:
"Partial maintainer-set drift is a low-severity triage signal when execution-capable releases retain some identities but also introduce maintainer churn without registry-visible provenance.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `npm package '${target.name}@${target.version}' retains only part of the prior maintainer identity set and lacks registry-visible provenance.`,
mitigation:
"Review which maintainer identities changed, compare against the prior release, and validate the transition before upgrading packages with install hooks.",
ruleId: "PROV-011",
severity: "low",
});
}
if (
target.version &&
establishedPackage &&
dormantReleaseGapAnomaly &&
hasInstallScript &&
!hasTrustedPublishing &&
!hasProvenanceEvidence
) {
findings.push({
category: "package-integrity",
description:
"A long dormant gap followed by a new execution-capable release can warrant a short review window when provenance is missing.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `npm package '${target.name}@${target.version}' arrived after an unusually long release gap and lacks registry-visible provenance.`,
mitigation:
"Review the release diff, compare against the prior version, and validate maintainer continuity before adopting after long dormancy.",
ruleId: "PROV-008",
severity: "low",
});
}
if (
target.version &&
establishedPackage &&
compressedCadence &&
hasInstallScript &&
!hasTrustedPublishing &&
!hasProvenanceEvidence
) {
findings.push({
category: "package-integrity",
description:
"A materially faster-than-usual release on a mature execution-capable package is a low-severity review signal when registry-visible provenance is absent.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `npm package '${target.name}@${target.version}' arrived materially faster than its prior release cadence and lacks registry-visible provenance.`,
mitigation:
"Review the release diff, compare the release timing against prior cadence, and validate the publisher transition before rapid upgrades of execution-capable packages.",
ruleId: "PROV-012",
severity: "low",
});
}
}
if (target.type === "pypi") {
const registry = getTargetProperty(target, "cdx:pypi:registry");
const isDefaultRegistry =
!registry ||
["https://pypi.org", "https://pypi.org/simple"].includes(registry);
const uploaderVerified =
getTargetProperty(target, "cdx:pypi:uploaderVerified") === "true";
const establishedPackage = isEstablishedPackage(target, "cdx:pypi");
const recentRelease = isRecentRelease(target, "cdx:pypi");
const publisherDrift = hasPublisherDrift(target, "cdx:pypi");
const maintainerSetDrift = hasMaintainerSetDrift(target, "cdx:pypi");
const partialMaintainerSetDrift = hasPartialIdentitySetDrift(
target,
"cdx:pypi",
);
const dormantReleaseGapAnomaly = hasDormantReleaseGapAnomaly(
target,
"cdx:pypi",
);
const compressedCadence = hasCompressedCadence(target, "cdx:pypi");
if (
target.version &&
isDefaultRegistry &&
!hasTrustedPublishing &&
!hasProvenanceEvidence &&
!uploaderVerified
) {
findings.push({
category: "package-integrity",
description:
"Default-registry PyPI packages without provenance or verified uploader context are weaker candidates for publisher-trust decisions.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `PyPI package '${target.name}@${target.version}' lacks registry-visible provenance and uploader verification signals.`,
mitigation:
"Prefer releases with provenance evidence or verified uploader metadata, especially for sensitive or newly introduced dependencies.",
ruleId: "PROV-002",
severity: "low",
});
}
if (
target.version &&
isDefaultRegistry &&
establishedPackage &&
recentRelease &&
!hasTrustedPublishing &&
!hasProvenanceEvidence &&
!uploaderVerified
) {
findings.push({
category: "package-integrity",
description:
"Very recent releases on mature packages can benefit from a short review window when provenance and uploader-verification signals are absent.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `PyPI package '${target.name}@${target.version}' is a very recent release on an established package without provenance or uploader verification signals.`,
mitigation:
"Delay adoption briefly, compare the release to the previous known-good version, and prefer verified/provenance-backed uploads for sensitive dependencies.",
ruleId: "PROV-005",
severity: "low",
});
}
if (
target.version &&
isDefaultRegistry &&
establishedPackage &&
publisherDrift &&
!hasTrustedPublishing &&
!hasProvenanceEvidence &&
!uploaderVerified
) {
findings.push({
category: "package-integrity",
description:
"Uploader drift on established PyPI packages is usually a triage signal, but becomes more meaningful when provenance and verification are missing.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `PyPI package '${target.name}@${target.version}' was uploaded by a different identity than the prior release and lacks provenance or uploader verification signals.`,
mitigation:
"Review the uploader change, compare the prior release uploader, and validate project ownership before upgrading critical dependencies.",
ruleId: "PROV-006",
severity: "low",
});
}
if (
target.version &&
isDefaultRegistry &&
establishedPackage &&
maintainerSetDrift &&
!hasTrustedPublishing &&
!hasProvenanceEvidence &&
!uploaderVerified
) {
findings.push({
category: "package-integrity",
description:
"Uploader-set drift on established PyPI packages is a triage signal when provenance and uploader verification are absent.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `PyPI package '${target.name}@${target.version}' has a fully different uploader identity set than the prior release and lacks provenance or uploader verification signals.`,
mitigation:
"Review uploader transitions, compare the prior release uploader set, and validate project ownership before upgrading sensitive dependencies.",
ruleId: "PROV-009",
severity: "low",
});
}
if (
target.version &&
isDefaultRegistry &&
establishedPackage &&
partialMaintainerSetDrift &&
!maintainerSetDrift &&
!hasTrustedPublishing &&
!hasProvenanceEvidence &&
!uploaderVerified
) {
findings.push({
category: "package-integrity",
description:
"Partial uploader-set drift is a low-severity review signal on established PyPI packages when provenance and uploader verification are absent.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `PyPI package '${target.name}@${target.version}' retains only part of the prior uploader identity set and lacks provenance or uploader verification signals.`,
mitigation:
"Review which uploader identities changed, compare the release against the prior version, and validate project ownership before upgrading sensitive dependencies.",
ruleId: "PROV-013",
severity: "low",
});
}
if (
target.version &&
isDefaultRegistry &&
establishedPackage &&
dormantReleaseGapAnomaly &&
!hasTrustedPublishing &&
!hasProvenanceEvidence &&
!uploaderVerified
) {
findings.push({
category: "package-integrity",
description:
"Established packages resurfacing after a long dormant gap benefit from extra review when provenance is weak.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `PyPI package '${target.name}@${target.version}' followed an unusually long release gap and lacks provenance or uploader verification signals.`,
mitigation:
"Compare the release to the prior known-good version and review maintainership continuity before adopting after long dormancy.",
ruleId: "PROV-010",
severity: "low",
});
}
if (
target.version &&
isDefaultRegistry &&
establishedPackage &&
compressedCadence &&
!hasTrustedPublishing &&
!hasProvenanceEvidence &&
!uploaderVerified
) {
findings.push({
category: "package-integrity",
description:
"Materially faster-than-usual release timing is a low-severity triage signal on mature PyPI packages when provenance and uploader verification remain weak.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `PyPI package '${target.name}@${target.version}' arrived materially faster than its prior release cadence and lacks provenance or uploader verification signals.`,
mitigation:
"Compare the release timing and contents against prior versions, then validate uploader continuity before rapid upgrades of sensitive dependencies.",
ruleId: "PROV-014",
severity: "low",
});
}
}
if (target.type === "cargo") {
const yanked = getTargetProperty(target, "cdx:cargo:yanked") === "true";
const establishedPackage = isEstablishedPackage(target, "cdx:cargo");
const recentRelease = isRecentRelease(target, "cdx:cargo");
const publisherDrift = hasPublisherDrift(target, "cdx:cargo");
const dormantReleaseGapAnomaly = hasDormantReleaseGapAnomaly(
target,
"cdx:cargo",
);
const compressedCadence = hasCompressedCadence(target, "cdx:cargo");
if (target.version && yanked) {
findings.push({
category: "package-integrity",
description:
"Yanked crates are removed from normal Cargo resolution and usually indicate a correctness, security, or publisher-action issue that deserves review before further adoption.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `Cargo crate '${target.name}@${target.version}' has been yanked from crates.io.`,
mitigation:
"Prefer a non-yanked release and review the crate's publisher history and changelog before upgrading.",
ruleId: "PROV-015",
severity: "high",
});
}
if (
target.version &&
establishedPackage &&
recentRelease &&
!hasTrustedPublishing &&
!hasProvenanceEvidence
) {
findings.push({
category: "package-integrity",
description:
"Very recent releases on established crates benefit from a short review window when trusted publishing and provenance remain weak.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `Cargo crate '${target.name}@${target.version}' is a very recent release on an established package without registry-visible provenance signals.`,
mitigation:
"Delay adoption briefly, compare the release to the prior version, and prefer trusted-publishing-backed releases for sensitive crates.",
ruleId: "PROV-016",
severity: "low",
});
}
if (
target.version &&
establishedPackage &&
publisherDrift &&
!hasTrustedPublishing &&
!hasProvenanceEvidence
) {
findings.push({
category: "package-integrity",
description:
"Publisher drift on established crates is often benign, but becomes more meaningful when provenance and trusted publishing are absent.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `Cargo crate '${target.name}@${target.version}' was published by a different identity than the prior release and lacks registry-visible provenance signals.`,
mitigation:
"Review the publisher transition, compare the prior release metadata, and validate ownership before upgrading sensitive crates.",
ruleId: "PROV-017",
severity: "medium",
});
}
if (
target.version &&
establishedPackage &&
(dormantReleaseGapAnomaly || compressedCadence) &&
!hasTrustedPublishing &&
!hasProvenanceEvidence
) {
findings.push({
category: "package-integrity",
description:
"Release timing anomalies on established crates are low-noise triage signals when provenance remains weak.",
location: {
bomRef: target.bomRefs?.[0],
purl: target.purl,
},
message: `Cargo crate '${target.name}@${target.version}' shows unusual release timing and lacks registry-visible provenance signals.`,
mitigation:
"Review the release diff and timing versus prior versions before rapidly adopting the new crate release.",
ruleId: "PROV-018",
severity: "low",
});
}
}
return findings;
}
/**
* Clone a repository into a deterministic workspace directory.
*
* @param {string} repoUrl repository URL
* @param {string} cloneDir target clone directory
* @param {string | undefined} gitRef git ref to checkout
* @returns {void}
*/
function cloneRepositoryToDir(repoUrl, cloneDir, gitRef) {
const gitArgs = [
"-c",
"alias.clone=",
"-c",
"core.fsmonitor=false",
"-c",
"safe.bareRepository=explicit",
"-c",
"core.hooksPath=/dev/null",
"clone",
"--template=",
repoUrl,
"--depth",
"1",
cloneDir,
];
if (gitRef) {
const cloneIndex = gitArgs.indexOf("clone");
gitArgs.splice(cloneIndex + 1, 0, "--branch", gitRef);
}
const result = hardenedGitCommand(gitArgs);
if (result.status !== 0) {
const stderr = result.stderr
? result.stderr.toString()
: "unknown git clone error";
const error = new Error(stderr.trim());
error.retryable =
/(timed out|unable to connect|could not resolve host|network is unreachable|connection reset|connection refused|temporary failure|remote end hung up unexpectedly|http 5\d\d|tls|econnreset|econnrefused|etimedout)/i.test(
stderr,
);
error.errorType = error.retryable ? "network" : "clone";
throw error;
}
}
async function cloneRepositoryToDirWithRetry(repoUrl, cloneDir, gitRef) {
let lastError;
for (let attempt = 0; attempt <= CLONE_RETRY_DELAYS_MS.length; attempt += 1) {
try {
cloneRepositoryToDir(repoUrl, cloneDir, gitRef);
return;
} catch (error) {
lastError = error;
safeRmSync(cloneDir, { force: true, recursive: true });
if (!error?.retryable || attempt >= CLONE_RETRY_DELAYS_MS.length) {
break;
}
await sleep(CLONE_RETRY_DELAYS_MS[attempt]);
}
}
const sanitizedRepoUrl = sanitizeRemoteUrlForLogs(repoUrl);
const message = lastError?.message || "unknown git clone error";
const error = new Error(
`Unable to clone '${sanitizedRepoUrl}' after ${CLONE_RETRY_DELAYS_MS.length + 1} attempt(s): ${message}`,
);
error.errorType = lastError?.errorType || "clone";
error.retryable = Boolean(lastError?.retryable);
throw error;
}
/**
* Reuse or create a checkout for a target repository.
*
* @param {object} target audit target
* @param {object} resolution resolved repository metadata
* @param {string | undefined} workspaceDir workspace directory
* @param {string | undefined} gitRef git ref to checkout
* @returns {{ cleanup: boolean, cloneDir: string, reused: boolean }} checkout info
*/
async function ensureCheckout(target, resolution, workspaceDir, gitRef) {
if (!workspaceDir) {
const cloneDir = safeMkdtempSync(
join(getTmpDir(), `${targetSlug(target)}-`),
);
await cloneRepositoryToDirWithRetry(resolution.repoUrl, cloneDir, gitRef);
return {
cleanup: true,
cloneDir,
reused: false,
};
}
const resolvedWorkspaceDir = resolve(workspaceDir);
if (!safeExistsSync(resolvedWorkspaceDir)) {
safeMkdirSync(resolvedWorkspaceDir, { recursive: true });
}
const cloneDir = join(resolvedWorkspaceDir, targetSlug(target));
if (safeExistsSync(join(cloneDir, ".git"))) {
return {
cleanup: false,
cloneDir,
reused: true,
};
}
if (safeExistsSync(cloneDir)) {
safeRmSync(cloneDir, { force: true, recursive: true });
}
await cloneRepositoryToDirWithRetry(resolution.repoUrl, cloneDir, gitRef);
return {
cleanup: false,
cloneDir,
reused: false,
};
}
/**
* Extract an expected package name from Python packaging metadata.
*
* @param {string} filePath metadata file path
* @returns {string | undefined} discovered package name
*/
function readPythonPackageName(filePath) {
let fileContent;
try {
fileContent = readFileSync(filePath, "utf8");
} catch {
return undefined;
}
const patterns = [
/(^|\n)\s*name\s*=\s*["']([^"'\n]+)["']/m,
/(^|\n)\s*name\s*=\s*([^\n#]+)/m,
/setup\s*\([^)]*name\s*=\s*["']([^"']+)["']/ms,
];
for (const pattern of patterns) {
const match = fileContent.match(pattern);
if (!match) {
continue;
}
const packageName = (match[2] || match[1] || "").trim();
if (packageName) {
return packageName;
}
}
return undefined;
}
/**
* Resolve the most specific Python package directory inside a cloned repo.
*
* @param {string} cloneDir cloned repository root
* @param {object} target audit target
* @returns {{ confidence: string, scanDir: string }} selected directory and confidence
*/
export function resolvePythonSourceDirectory(cloneDir, target) {
const normalizedTargetName = normalizePackageName(target.name);
const queue = [cloneDir];
const matches = [];
while (queue.length) {
const currentDir = queue.shift();
let entries = [];
try {
entries = readdirSync(currentDir, { withFileTypes: true });
} catch {
continue;
}
for (const entry of entries) {
const entryPath = join(currentDir, entry.name);
if (entry.isDirectory()) {
if (!PYTHON_SKIP_DIRS.has(entry.name)) {
queue.push(entryPath);
}
continue;
}
if (!entry.isFile() || !PYTHON_METADATA_FILES.includes(entry.name)) {
continue;
}
const packageName = readPythonPackageName(entryPath);
if (normalizePackageName(packageName) === normalizedTargetName) {
matches.push(currentDir);
}
}
}
if (!matches.length) {
return {
confidence: "low",
scanDir: cloneDir,
};
}
matches.sort((left, right) => left.length - right.length);
return {
confidence: matches[0] === cloneDir ? "medium" : "high",
scanDir: matches[0],
};
}
/**
* Resolve the most appropriate scan directory for a cloned target repository.
*
* @param {string} cloneDir cloned repository root
* @param {object} target audit target
* @param {object} resolution repository resolution metadata
* @returns {{ confidence: string, scanDir: string }} selected directory and confidence
*/
export function resolveTargetSourceDirectory(cloneDir, target, resolution) {
if (target.type === "npm") {
const scanDir = resolvePurlSourceDirectory(cloneDir, resolution);
if (!scanDir) {
return {
confidence: "medium",
scanDir: cloneDir,
};
}
return {
confidence: scanDir === cloneDir ? "medium" : "high",
scanDir,
};
}
if (target.type === "pypi") {
return resolvePythonSourceDirectory(cloneDir, target);
}
return {
confidence: "low",
scanDir: cloneDir,
};
}
function collectPythonHeuristicFiles(scanDir) {
const candidates = [];
const queue = [scanDir];
while (queue.length && candidates.length < PYTHON_HEURISTIC_FILE_LIMIT) {
const currentDir = queue.shift();
let entries = [];
try {
entries = readdirSync(currentDir, { withFileTypes: true });
} catch {
continue;
}
for (const entry of entries) {
const entryPath = join(currentDir, entry.name);
if (entry.isDirectory()) {
if (!PYTHON_SKIP_DIRS.has(entry.name)) {
queue.push(entryPath);
}
continue;
}
if (
entry.isFile() &&
PYTHON_HEURISTIC_FILENAMES.has(entry.name) &&
candidates.length < PYTHON_HEURISTIC_FILE_LIMIT
) {
candidates.push(entryPath);
}
}
}
return candidates;
}
function inspectPythonHeuristicFile(filePath) {
let fileSize;
try {
fileSize = statSync(filePath).size;
} catch {
return undefined;
}
if (fileSize > PYTHON_HEURISTIC_MAX_FILE_BYTES) {
return undefined;
}
let fileContent;
try {
fileContent = readFileSync(filePath, "utf8");
} catch {
return undefined;
}
const indicators = [];
if (PYTHON_EXECUTION_PATTERN.test(fileContent)) {
indicators.push("process-or-dynamic-execution");
}
if (PYTHON_NETWORK_PATTERN.test(fileContent)) {
indicators.push("network-access");
}
if (PYTHON_OBFUSCATION_PATTERN.test(fileContent)) {
indicators.push("encoded-loader");
}
if (
filePath.endsWith("setup.py") &&
PYTHON_SETUP_CMDCLASS_PATTERN.test(fileContent)
) {
indicators.push("setup-cmdclass");
}
return indicators.length ? indicators : undefined;
}
/**
* Build shallow predictive findings for suspicious Python packaging files.
*
* Phase 1 intentionally focuses on high-signal packaging surfaces (`setup.py`
* and package `__init__.py`) until deeper Python static analysis is added.
*
* @param {string} scanDir cloned repository scan directory
* @param {object} target audit target
* @returns {object[]} predictive findings
*/
export function buildPythonSourceHeuristicFindings(scanDir, target) {
if (!scanDir || target?.type !== "pypi") {
return [];
}
const findings = []