UNPKG

@cyclonedx/cdxgen

Version:

Creates CycloneDX Software Bill of Materials (SBOM) from source or container image

1,311 lines (1,264 loc) 36.6 kB
import fs from "node:fs"; import path from "node:path"; import process from "node:process"; import { URL } from "node:url"; import { PackageURL } from "packageurl-js"; import { coerce, diff, prerelease } from "semver"; import { thoughtLog } from "./logger.js"; import { cdxgenAgent, createDryRunError, DEBUG_MODE, fetchPomXmlAsJson, getTmpDir, hasDangerousUnicode, isDryRun, isSecureMode, isValidDriveRoot, isWin, recordActivity, safeMkdtempSync, safeRmSync, safeSpawnSync, } from "./utils.js"; export const PURL_REGISTRY_LOOKUP_WARNING = "Resolved repository URL from package registry metadata. This source can be inaccurate or malicious; review before trusting results."; export const SUPPORTED_PURL_SOURCE_TYPES = [ "npm", "pypi", "gem", "cargo", "pub", "github", "bitbucket", "maven", "composer", "generic", ]; const MAX_MONOREPO_PACKAGE_JSON_FILES = 2000; const MAX_MONOREPO_DIRECTORIES = 5000; const MAX_RELEASE_NOTE_RESOLVES = 50; /** * Build a scoped npm package name. * * @param {string|undefined} namespace package namespace * @param {string|undefined} name package name * @returns {string|undefined} scoped package name */ function buildScopedNpmPackageName(namespace, name) { if (!name) { return undefined; } if (!namespace) { return name; } return `${namespace.startsWith("@") ? namespace : `@${namespace}`}/${name}`; } /** * Validate git ref names used as branch/tag values. * * @param {string|undefined} refName branch or tag name * @returns {boolean} true if safe */ function isSafeGitRefName(refName) { if (!refName || typeof refName !== "string") { return false; } if (refName.startsWith("-")) { return false; } return /^[A-Za-z0-9._/@+-]+$/.test(refName); } function inferGitOperation(args = []) { for (let index = 0; index < args.length; index += 1) { const arg = args[index]; if (arg === "-c" || arg === "--config-env") { index += 1; continue; } if (typeof arg === "string" && arg.startsWith("-")) { continue; } return arg || "command"; } return "command"; } /** * Execute git with hardened defaults. * * @param {string[]} args git arguments * @param {Object} options command options * @param {string|undefined} options.cwd working directory * @returns {Object} spawn result */ export function hardenedGitCommand(args, options = {}) { const gitOperation = inferGitOperation(args); const gitAllowProtocol = getGitAllowProtocol(); const envConfigs = { GIT_CONFIG_COUNT: "2", GIT_CONFIG_KEY_0: "core.fsmonitor", GIT_CONFIG_VALUE_0: "false", GIT_CONFIG_KEY_1: "safe.bareRepository", GIT_CONFIG_VALUE_1: "explicit", GIT_TERMINAL_PROMPT: "0", }; const env = isSecureMode ? { ...process.env, ...envConfigs, GIT_CONFIG_NOSYSTEM: "1", GIT_CONFIG_GLOBAL: "/dev/null", GIT_ALLOW_PROTOCOL: gitAllowProtocol, } : { ...process.env, ...envConfigs, GIT_ALLOW_PROTOCOL: gitAllowProtocol, }; return safeSpawnSync("git", args, { cdxgenActivity: { blockedReason: `Dry run mode blocks git ${gitOperation} operations.`, gitOperation, kind: `git-${gitOperation}`, metadata: { capability: "git-operation", }, }, shell: false, cwd: options.cwd, env, }); } function normalizeTagName(tagName) { if (!tagName || typeof tagName !== "string") { return undefined; } return tagName .trim() .replace(/^refs\/tags\//, "") .replace(/\^\{\}$/, ""); } function releaseTypeFromTags(currentTag, previousTag) { const currentVersion = coerce(currentTag || ""); const previousVersion = coerce(previousTag || ""); if (!currentVersion || !previousVersion) { return "internal"; } if (prerelease(currentVersion.version)?.length) { return "pre-release"; } const versionDiff = diff(previousVersion.version, currentVersion.version); if (versionDiff === "major") { return "major"; } if (versionDiff === "minor") { return "minor"; } if (versionDiff === "patch") { return "patch"; } return "internal"; } function parseTagList(output) { return (output || "") .split("\n") .map((line) => normalizeTagName(line)) .filter((tagName) => isSafeGitRefName(tagName)) .filter(Boolean); } function parseLsRemoteTags(output) { const tags = []; for (const line of (output || "").split("\n")) { const ref = normalizeTagName(line.trim().split(/\s+/)[1]); if (ref && isSafeGitRefName(ref)) { tags.push(ref); } } return Array.from(new Set(tags)).sort((a, b) => { const av = coerce(a || ""); const bv = coerce(b || ""); if (av && bv) { return bv.compare(av); } return b.localeCompare(a); }); } function issueTypeFromCommitMessage(message) { const normalized = (message || "").toLowerCase(); if ( normalized.includes("security") || normalized.includes("cve-") || normalized.includes("vuln") ) { return "security"; } if ( normalized.startsWith("fix") || normalized.includes(" bug") || normalized.includes("defect") ) { return "defect"; } return "enhancement"; } /** * Build CycloneDX release notes from git tags and commits. * * @param {string|undefined} repoPath local repository path * @param {Object} options options carrying release notes hints * @returns {Object|undefined} releaseNotes object */ export function buildReleaseNotesFromGit(repoPath, options = {}) { let currentTag = normalizeTagName(options.releaseNotesCurrentTag); let previousTag = normalizeTagName(options.releaseNotesPreviousTag); let remoteUrl; let localRepoAvailable = false; if (repoPath && !maybeRemotePath(repoPath)) { const repoCheck = hardenedGitCommand( ["rev-parse", "--is-inside-work-tree"], { cwd: repoPath, }, ); localRepoAvailable = repoCheck.status === 0; if (localRepoAvailable) { const localTagsResult = hardenedGitCommand( ["tag", "--sort=-creatordate", "--merged", "HEAD"], { cwd: repoPath }, ); const localTags = parseTagList(localTagsResult.stdout); if (!currentTag && localTags.length) { currentTag = localTags[0]; } if (!previousTag && localTags.length > 1) { previousTag = localTags.find((t) => t !== currentTag); } const remoteResult = hardenedGitCommand( ["config", "--get", "remote.origin.url"], { cwd: repoPath }, ); if (remoteResult.status === 0 && remoteResult.stdout) { remoteUrl = remoteResult.stdout.toString().trim(); } } } remoteUrl = remoteUrl || options.releaseNotesGitUrl; const remoteUrlValidationError = typeof remoteUrl === "string" ? validateAndRejectGitSource(remoteUrl) : null; const canDiscoverRemoteTags = (!currentTag || !previousTag) && typeof remoteUrl === "string" && !remoteUrl.startsWith("-") && !remoteUrlValidationError && /github\.com[:/]/i.test(remoteUrl); if (canDiscoverRemoteTags) { const remoteTagsResult = hardenedGitCommand( [ "-c", "alias.ls-remote=", "-c", "core.fsmonitor=false", "-c", "safe.bareRepository=explicit", "-c", "core.hooksPath=/dev/null", "ls-remote", "--refs", "--tags", "--", remoteUrl, ], {}, ); const remoteTags = parseLsRemoteTags(remoteTagsResult.stdout); if (!currentTag && remoteTags.length) { currentTag = remoteTags[0]; } if (!previousTag && remoteTags.length > 1) { previousTag = remoteTags.find((t) => t !== currentTag); } } if (!currentTag) { return undefined; } if (!isSafeGitRefName(currentTag)) { return undefined; } if (previousTag && !isSafeGitRefName(previousTag)) { previousTag = undefined; } let timestamp; if (localRepoAvailable) { const tsResult = hardenedGitCommand( ["log", "-1", "--format=%cI", currentTag], { cwd: repoPath, }, ); if (tsResult.status === 0 && tsResult.stdout) { timestamp = tsResult.stdout.toString().trim(); } } if (!timestamp) { timestamp = new Date().toISOString(); } const resolves = []; if (localRepoAvailable && previousTag) { const logResult = hardenedGitCommand( ["log", "--pretty=format:%H%x09%s", `${previousTag}..${currentTag}`], { cwd: repoPath }, ); if (logResult.status === 0 && logResult.stdout) { // Keep the resolves list bounded to avoid excessive metadata growth. for (const line of logResult.stdout .toString() .split("\n") .filter(Boolean)) { const [sha, ...rest] = line.split("\t"); const message = rest.join("\t").trim(); if (!sha || !message) { continue; } resolves.push({ type: issueTypeFromCommitMessage(message), id: sha.substring(0, 12), name: message, description: message, }); } if (resolves.length > MAX_RELEASE_NOTE_RESOLVES) { resolves.length = MAX_RELEASE_NOTE_RESOLVES; } } } const tags = [currentTag]; if (previousTag && previousTag !== currentTag) { tags.push(previousTag); } return { type: releaseTypeFromTags(currentTag, previousTag), title: `Release notes for ${currentTag}`, description: previousTag ? `Changes between ${previousTag} and ${currentTag}.` : `Release notes for ${currentTag}.`, timestamp, tags, resolves, }; } /** * Return git allow protocol string from the environment variables. * * @returns {string} git allow protocol string */ export function getGitAllowProtocol() { return ( process.env.GIT_ALLOW_PROTOCOL || process.env.CDXGEN_GIT_ALLOW_PROTOCOL || process.env.CDXGEN_SERVER_GIT_ALLOW_PROTOCOL || (isSecureMode ? "https:ssh" : "https:git:ssh") ); } /** * Return configured allowed git hosts. * * @returns {string[]} list of configured hosts */ function getAllowedHosts() { const configuredHosts = process.env.CDXGEN_GIT_ALLOWED_HOSTS || process.env.CDXGEN_SERVER_ALLOWED_HOSTS || ""; return configuredHosts .split(",") .map((h) => h.trim()) .filter(Boolean); } /** * Checks the given hostname against the allowed list. * * @param {string} hostname Host name to check * @returns {boolean} true if the hostname in its entirety is allowed. false otherwise. */ export function isAllowedHost(hostname) { const allowedHosts = getAllowedHosts(); if (!allowedHosts.length) { return true; } if (hasDangerousUnicode(hostname)) { return false; } return allowedHosts.includes(hostname); } /** * Return configured allowed paths. * * @returns {string[]} list of configured paths */ function getAllowedPaths() { const configuredPaths = process.env.CDXGEN_ALLOWED_PATHS || process.env.CDXGEN_SERVER_ALLOWED_PATHS || ""; return configuredPaths .split(",") .map((p) => p.trim()) .filter(Boolean); } /** * Checks the given path string to belong to a drive in Windows. * * @param {string} p Path string to check * @returns {boolean} true if the windows path belongs to a drive. false otherwise (device names) */ export function isAllowedWinPath(p) { if (typeof p !== "string") { return false; } if (p === "") { return true; } if (hasDangerousUnicode(p)) { return false; } if (!isWin) { return true; } try { const normalized = path.normalize(p); if (hasDangerousUnicode(normalized)) { return false; } const { root } = path.parse(normalized); if (root === "\\") { return true; } if (root.startsWith("\\\\")) { return false; } return isValidDriveRoot(root); } catch (_err) { return false; } } /** * Checks the given path against the allowed list. * * @param {string} p Path string to check * @returns {boolean} true if the path is present in the allowed paths. false otherwise. */ export function isAllowedPath(p) { if (typeof p !== "string") { return false; } if (hasDangerousUnicode(p)) { return false; } const allowedPaths = getAllowedPaths(); if (!allowedPaths.length) { return true; } if (isWin && !isAllowedWinPath(p)) { return false; } return allowedPaths.some((ap) => { const resolvedP = path.resolve(p); const resolvedAp = path.resolve(ap); const relativePath = path.relative(resolvedAp, resolvedP); return ( relativePath === "" || (!relativePath.startsWith("..") && !path.isAbsolute(relativePath)) ); }); } /** * Determine if the path could be a package URL. * * @param {string} filePath Path or URL * @returns {boolean} true if the file path looks like a purl */ export function maybePurlSource(filePath) { return typeof filePath === "string" && filePath.startsWith("pkg:"); } /** * Determine if the file path could be a remote URL. * * @param {string} filePath The Git URL or local path * @returns {boolean} true if the file path is a remote URL. false otherwise. */ export function maybeRemotePath(filePath) { return /^[a-zA-Z0-9+.-]+:\/\//.test(filePath) || filePath.startsWith("git@"); } /** * Validates a given Git URL/Path against dangerous protocols and allowed hosts. * * @param {string} filePath The Git URL or local path * @returns {Object|null} Error object if invalid, or null if valid */ export function validateAndRejectGitSource(filePath) { if (/^(ext|fd)::/i.test(filePath)) { return { status: 400, error: "Invalid Protocol", details: "The provided protocol is not allowed.", }; } if (maybeRemotePath(filePath)) { let gitUrlObj; try { let urlToParse = filePath; if (filePath.startsWith("git@") && !filePath.includes("://")) { urlToParse = `ssh://${filePath.replace(":", "/")}`; } gitUrlObj = new URL(urlToParse); } catch (_err) { return { status: 400, error: "Invalid URL Format", details: "The provided Git URL is malformed.", }; } const gitAllowProtocol = getGitAllowProtocol(); const allowedSchemes = gitAllowProtocol .split(":") .filter(Boolean) .map((p) => `${p.toLowerCase()}:`); if ( allowedSchemes.includes("ssh:") && !allowedSchemes.includes("git+ssh:") ) { allowedSchemes.push("git+ssh:"); } if (!allowedSchemes.includes(gitUrlObj.protocol)) { return { status: 400, error: "Protocol Not Allowed", details: `The protocol '${gitUrlObj.protocol}' is not permitted by GIT_ALLOW_PROTOCOL.`, }; } if (gitUrlObj.href.includes("::")) { return { status: 400, error: "Invalid URL Syntax", details: "Git remote helper syntax (::) is not allowed.", }; } if (!isAllowedHost(gitUrlObj.hostname)) { return { status: 403, error: "Host Not Allowed", details: "The Git URL host is not allowed as per the allowlist.", }; } } return null; } /** * Clone a git repository into a temporary directory. * * @param {string} repoUrl Repository URL * @param {string|string[]|null} branch Branch name * @returns {string} cloned directory path */ export function gitClone(repoUrl, branch = null) { let baseDirName = path.basename(repoUrl); if (!/^[a-zA-Z0-9_-]+$/.test(baseDirName)) { baseDirName = "repo-"; } if (isDryRun) { const error = createDryRunError( "clone", repoUrl, "Dry run mode blocks repository cloning.", ); recordActivity({ kind: "clone", reason: error.message, status: "blocked", target: repoUrl, }); return path.join(getTmpDir(), `${baseDirName}${path.sep}dry-run-clone`); } const tempDir = safeMkdtempSync(path.join(getTmpDir(), baseDirName)); const gitArgs = [ "-c", "alias.clone=", "-c", "core.fsmonitor=false", "-c", "safe.bareRepository=explicit", "-c", "core.hooksPath=/dev/null", "clone", "--template=", repoUrl, "--depth", "1", tempDir, ]; if (branch) { const firstBranchStr = Array.isArray(branch) ? branch[0] : String(branch); if (!isSafeGitRefName(firstBranchStr)) { console.warn("Skipping branch clone: invalid branch name"); } else { const cloneIndex = gitArgs.indexOf("clone"); gitArgs.splice(cloneIndex + 1, 0, "--branch", firstBranchStr); } } thoughtLog( `Cloning Repo${branch ? ` with branch ${branch}` : ""} to ${tempDir}`, ); const result = hardenedGitCommand(gitArgs); if (result.status !== 0) { console.error(result.stderr); } return tempDir; } /** * Sanitize remote URL for logging. * * @param {string|undefined} remoteUrl Repository URL * @returns {string|undefined} sanitized URL */ export function sanitizeRemoteUrlForLogs(remoteUrl) { if (!remoteUrl || typeof remoteUrl !== "string") { return remoteUrl; } try { const parsed = new URL(remoteUrl); if (parsed.username || parsed.password) { parsed.username = "***"; parsed.password = "***"; } return parsed.toString(); } catch (_err) { return remoteUrl; } } /** * Find a matching git ref for a package version. * * @param {string} repoUrl Repository URL * @param {Object|undefined} purlResolution purl resolution metadata * @returns {string|undefined} matching tag or branch reference */ export function findGitRefForPurlVersion(repoUrl, purlResolution) { const packageVersion = purlResolution?.version; if (!packageVersion) { return undefined; } const purlType = purlResolution?.type; const purlNamespace = purlResolution?.namespace; const purlName = purlResolution?.name; const refCandidates = [packageVersion, `v${packageVersion}`]; if (purlType === "npm" && purlName) { const scopedName = buildScopedNpmPackageName(purlNamespace, purlName); refCandidates.push( `${purlName}@${packageVersion}`, `${scopedName}@${packageVersion}`, `${purlName}-v${packageVersion}`, `${scopedName}-v${packageVersion}`, ); } const filteredCandidates = [...new Set(refCandidates)].filter((candidate) => isSafeGitRefName(candidate), ); if (!repoUrl || repoUrl.startsWith("-")) { return undefined; } if (validateAndRejectGitSource(repoUrl)) { return undefined; } const result = hardenedGitCommand( [ "-c", "alias.ls-remote=", "-c", "core.fsmonitor=false", "-c", "safe.bareRepository=explicit", "-c", "core.hooksPath=/dev/null", "ls-remote", "--refs", "--tags", "--heads", "--", repoUrl, ], {}, ); if (result.status !== 0 || !result.stdout) { return undefined; } const availableRefs = result.stdout .split("\n") .map((line) => line.trim().split(/\s+/)[1]) .filter(Boolean) .map((ref) => ref.replace(/^refs\/(?:tags|heads)\//, "")); for (const candidate of filteredCandidates) { if (availableRefs.includes(candidate)) { return candidate; } } return undefined; } /** * Find the best source directory for purl-based npm monorepo scans. * * @param {string} srcDir cloned source directory * @param {Object|undefined} purlResolution purl resolution metadata * @returns {string|undefined} preferred source directory */ export function resolvePurlSourceDirectory(srcDir, purlResolution) { if (purlResolution?.type !== "npm" || !purlResolution?.name || !srcDir) { return undefined; } const purlNamespace = purlResolution?.namespace; const packageNameCandidates = [purlResolution.name]; if (purlNamespace) { packageNameCandidates.push( `${purlNamespace}/${purlResolution.name}`, buildScopedNpmPackageName(purlNamespace, purlResolution.name), ); } const uniquePackageNameCandidates = [ ...new Set(packageNameCandidates.filter(Boolean)), ]; const skipDirectories = new Set([ ".git", ".idea", ".vscode", "build", "dist", "node_modules", "out", "target", "vendor", ]); const queue = [srcDir]; const matches = new Set(); let packageJsonCount = 0; let currentIndex = 0; while ( currentIndex < queue.length && packageJsonCount < MAX_MONOREPO_PACKAGE_JSON_FILES ) { if (currentIndex >= MAX_MONOREPO_DIRECTORIES) { break; } const currentDir = queue[currentIndex]; currentIndex += 1; if (!currentDir) { continue; } let entries = []; try { entries = fs.readdirSync(currentDir, { withFileTypes: true, }); } catch (_err) { continue; } for (const entry of entries) { if (entry.isDirectory()) { if (skipDirectories.has(entry.name)) { continue; } queue.push(path.join(currentDir, entry.name)); continue; } if (!entry.isFile() || entry.name !== "package.json") { continue; } packageJsonCount += 1; const packageJsonPath = path.join(currentDir, entry.name); try { const pkgJson = JSON.parse(fs.readFileSync(packageJsonPath, "utf-8")); if (uniquePackageNameCandidates.includes(pkgJson?.name)) { matches.add(currentDir); } } catch (_err) { // Ignore invalid package.json files in monorepos. } } } if (!matches.size) { return undefined; } const dedupedMatches = [...matches]; dedupedMatches.sort((a, b) => a.length - b.length); return dedupedMatches[0]; } /** * Normalize repository URL values from registry metadata to a cloneable URL. * * @param {string|undefined} candidateUrl raw URL candidate * @returns {string|undefined} normalized URL */ function normalizeRepositoryUrl(candidateUrl) { if (!candidateUrl || typeof candidateUrl !== "string") { return undefined; } let repoUrl = candidateUrl.trim(); if (!repoUrl) { return undefined; } if (/^git\s+/.test(repoUrl)) { repoUrl = repoUrl.replace(/^git\s+/, "git+"); } if (repoUrl.startsWith("git+")) { repoUrl = repoUrl.slice(4); } if (repoUrl.startsWith("scm:git:")) { repoUrl = repoUrl.slice(8); } if (repoUrl.startsWith("github:")) { repoUrl = `https://github.com/${repoUrl.slice("github:".length)}`; } if (repoUrl.startsWith("gitlab:")) { repoUrl = `https://gitlab.com/${repoUrl.slice("gitlab:".length)}`; } if (repoUrl.startsWith("bitbucket:")) { repoUrl = `https://bitbucket.org/${repoUrl.slice("bitbucket:".length)}`; } if ( !repoUrl.includes("://") && /^(?:[^/]+\.)?github\.com\/.+/.test(repoUrl) ) { repoUrl = `https://${repoUrl}`; } if (!repoUrl.includes("://") && repoUrl.startsWith("www.")) { repoUrl = `https://${repoUrl}`; } const hashIndex = repoUrl.indexOf("#"); if (hashIndex > -1) { repoUrl = repoUrl.slice(0, hashIndex); } return repoUrl; } /** * Normalize repository URL values represented as string/object metadata fields. * * @param {string|Object|undefined} candidate repository field value * @returns {string|undefined} normalized URL */ function normalizeRepositoryObject(candidate) { if (!candidate) { return undefined; } if (typeof candidate === "string") { return normalizeRepositoryUrl(candidate); } if (typeof candidate === "object") { return normalizeRepositoryUrl(candidate.url); } return undefined; } /** * Build a cloneable repository URL from known download URL patterns. * * @param {string|undefined} candidateUrl raw download URL * @returns {string|undefined} normalized repository URL */ function normalizeDownloadRepositoryUrl(candidateUrl) { const normalized = normalizeRepositoryUrl(candidateUrl); if (!normalized) { return undefined; } let parsed; try { parsed = new URL(normalized); } catch (_err) { return undefined; } const segments = parsed.pathname.split("/").filter(Boolean); if (parsed.hostname === "github.com" && segments.length >= 2) { return `https://github.com/${segments[0]}/${segments[1]}`; } if (parsed.hostname === "codeload.github.com" && segments.length >= 2) { return `https://github.com/${segments[0]}/${segments[1]}`; } if (parsed.hostname === "gitlab.com" && segments.length >= 2) { return `https://gitlab.com/${segments[0]}/${segments[1]}`; } if (normalized.endsWith(".git")) { return normalized; } return undefined; } /** * Build package name key for registry lookup from a parsed package URL. * * @param {PackageURL} purlObj parsed package URL object * @returns {string} package name suitable for registry API lookup */ function packageNameForLookup(purlObj) { const namespace = purlObj.namespace; if (!namespace) { return purlObj.name; } return `${namespace}/${purlObj.name}`; } /** * Validate package URL source input and return an error object when invalid. * * @param {string} purlString package URL string * @returns {{status:number,error:string,details:string}|null} validation error or null */ export function validatePurlSource(purlString) { if (!maybePurlSource(purlString)) { return null; } let purlObj; try { purlObj = PackageURL.fromString(purlString); } catch (_err) { return { status: 400, error: "Invalid purl source", details: "The provided package URL is malformed.", }; } const purlType = purlObj?.type; if (!SUPPORTED_PURL_SOURCE_TYPES.includes(purlType)) { return { status: 400, error: "Unsupported purl source type", details: `Supported purl types for automatic git URL detection: ${SUPPORTED_PURL_SOURCE_TYPES.join(", ")}.`, }; } if (!purlObj?.name) { return { status: 400, error: "Invalid purl source", details: "The provided package URL does not include a package name.", }; } const purlQualifiers = purlObj?.qualifiers || {}; if ( ["github", "bitbucket", "maven", "composer"].includes(purlType) && !purlObj?.namespace ) { return { status: 400, error: "Invalid purl source", details: `The provided ${purlType} package URL must include a namespace.`, }; } if (purlType === "maven" && !purlObj?.version) { return { status: 400, error: "Invalid purl source", details: "The provided maven package URL must include a version.", }; } if ( purlType === "generic" && !purlQualifiers.vcs_url && !purlQualifiers.download_url ) { return { status: 400, error: "Unsupported generic purl source", details: "generic purl sources must include a vcs_url or download_url qualifier.", }; } return null; } /** * Resolve a git repository URL from a package URL by querying package registries. * * Supported purl types: * - npm -> registry.npmjs.org * - pypi -> pypi.org * - gem -> rubygems.org * - cargo -> crates.io * - pub -> pub.dev * - github -> github.com/{namespace}/{name} * - bitbucket -> bitbucket.org/{namespace}/{name} * - maven -> repo1.maven.org POM scm metadata * - composer -> repo.packagist.org p2 metadata * - generic -> qualifiers: vcs_url, download_url * * @param {string} purlString package URL string * @returns {Promise<{repoUrl:string|undefined, registry:string|undefined, type:string}|undefined>} resolution result */ export async function resolveGitUrlFromPurl(purlString) { if (!maybePurlSource(purlString)) { return undefined; } const validationError = validatePurlSource(purlString); if (validationError) { return undefined; } let purlObj; try { purlObj = PackageURL.fromString(purlString); } catch (err) { if (DEBUG_MODE) { const errorMessage = err?.message || String(err); thoughtLog("Unable to resolve repository URL for purl.", { purlString, errorMessage, }); } return undefined; } if (!purlObj?.type || !purlObj?.name) { return undefined; } const packageName = packageNameForLookup(purlObj); const packageVersion = purlObj.version; let repoUrl; let registry; const logPurlResolutionError = (err) => { const errorMessage = err?.message || String(err); const errorDetails = []; if (err?.code) { errorDetails.push(`code=${err.code}`); } if (typeof err?.statusCode === "number") { errorDetails.push(`status=${err.statusCode}`); } if (err?.hostname) { errorDetails.push(`host=${err.hostname}`); } const sanitizedRegistry = sanitizeRemoteUrlForLogs(registry); console.error( `Unable to resolve repository URL for purl '${purlString}'${sanitizedRegistry ? ` using registry '${sanitizedRegistry}'` : ""}: ${errorMessage}${errorDetails.length ? ` (${errorDetails.join(", ")})` : ""}`, ); if (DEBUG_MODE) { thoughtLog("Unable to resolve repository URL for purl.", { purlString, errorMessage, errorDetails, registry: sanitizedRegistry, }); } }; try { switch (purlObj.type) { case "npm": { registry = process.env.NPM_URL || "https://registry.npmjs.org/"; const res = await cdxgenAgent.get(`${registry}${packageName}`, { responseType: "json", }); const body = res.body; const versionBody = packageVersion ? body.versions?.[packageVersion] : undefined; repoUrl = normalizeRepositoryObject(versionBody?.repository) || normalizeRepositoryObject(body.repository) || normalizeRepositoryUrl(versionBody?.homepage) || normalizeRepositoryUrl(body.homepage); break; } case "pypi": { registry = process.env.PYPI_URL || "https://pypi.org/pypi/"; const suffix = packageVersion ? `${purlObj.name}/${packageVersion}/json` : `${purlObj.name}/json`; const res = await cdxgenAgent.get(`${registry}${suffix}`, { responseType: "json", }); const info = res.body?.info || {}; const projectUrls = info.project_urls || {}; repoUrl = normalizeRepositoryUrl(projectUrls.Source) || normalizeRepositoryUrl(projectUrls.Repository) || normalizeRepositoryUrl(projectUrls["Source Code"]) || normalizeRepositoryUrl(projectUrls.Code) || normalizeRepositoryUrl(info.home_page); break; } case "gem": { const v1Url = process.env.RUBYGEMS_V1_URL || "https://rubygems.org/api/v1/gems/"; const v2Url = process.env.RUBYGEMS_V2_URL || "https://rubygems.org/api/v2/rubygems/"; registry = packageVersion ? v2Url : v1Url; const endpoint = packageVersion ? `${v2Url}${purlObj.name}/versions/${packageVersion}.json` : `${v1Url}${purlObj.name}.json`; const res = await cdxgenAgent.get(endpoint, { responseType: "json", }); const body = Array.isArray(res.body) ? res.body[0] : res.body; repoUrl = normalizeRepositoryUrl(body?.metadata?.source_code_uri) || normalizeRepositoryUrl(body?.source_code_uri) || normalizeRepositoryUrl(body?.homepage_uri); break; } case "cargo": { registry = process.env.RUST_CRATES_URL || "https://crates.io/api/v1/crates/"; const res = await cdxgenAgent.get(`${registry}${purlObj.name}`, { responseType: "json", }); repoUrl = normalizeRepositoryUrl(res.body?.crate?.repository); break; } case "pub": { registry = process.env.PUB_DEV_URL || "https://pub.dev"; const endpoint = packageVersion ? `${registry}/api/packages/${purlObj.name}/versions/${packageVersion}` : `${registry}/api/packages/${purlObj.name}`; const res = await cdxgenAgent.get(endpoint, { responseType: "json", headers: { Accept: "application/vnd.pub.v2+json", }, }); const pubspec = res.body?.pubspec || res.body?.latest?.pubspec || {}; repoUrl = normalizeRepositoryUrl(pubspec.repository) || normalizeRepositoryUrl(pubspec.homepage); break; } case "github": { registry = "https://github.com"; if (purlObj.namespace) { repoUrl = normalizeRepositoryUrl( `${registry}/${purlObj.namespace}/${purlObj.name}`, ); } break; } case "bitbucket": { registry = "https://bitbucket.org"; if (purlObj.namespace) { repoUrl = normalizeRepositoryUrl( `${registry}/${purlObj.namespace}/${purlObj.name}`, ); } break; } case "maven": { if (!purlObj.namespace || !packageVersion) { break; } const mavenCentralUrl = process.env.MAVEN_CENTRAL_URL || "https://repo1.maven.org/maven2/"; registry = mavenCentralUrl.endsWith("/") ? mavenCentralUrl : `${mavenCentralUrl}/`; const pomJson = await fetchPomXmlAsJson({ urlPrefix: registry, group: purlObj.namespace, name: purlObj.name, version: packageVersion, }); repoUrl = normalizeRepositoryUrl(pomJson?.scm?.url?._) || normalizeRepositoryUrl(pomJson?.scm?.connection?._) || normalizeRepositoryUrl(pomJson?.scm?.developerConnection?._); break; } case "composer": { const packagistUrl = process.env.PACKAGIST_URL || "https://repo.packagist.org/p2/"; registry = packagistUrl.endsWith("/") ? packagistUrl : `${packagistUrl}/`; const endpoint = `${registry}${packageName}.json`; const res = await cdxgenAgent.get(endpoint, { responseType: "json", }); const packageVersions = res.body?.packages?.[packageName]; if (!Array.isArray(packageVersions) || !packageVersions.length) { break; } const selectedVersion = packageVersion ? packageVersions.find( (v) => v?.version === packageVersion || v?.version_normalized === packageVersion, ) : packageVersions[0]; repoUrl = normalizeRepositoryUrl(selectedVersion?.source?.url) || normalizeRepositoryUrl(selectedVersion?.homepage); break; } case "generic": { const genericVcsUrl = purlObj.qualifiers?.vcs_url; const genericDownloadUrl = purlObj.qualifiers?.download_url; repoUrl = normalizeRepositoryUrl(genericVcsUrl) || normalizeDownloadRepositoryUrl(genericDownloadUrl); break; } default: return undefined; } } catch (err) { logPurlResolutionError(err); return undefined; } if (!repoUrl) { return undefined; } if (!maybeRemotePath(repoUrl)) { if (DEBUG_MODE) { console.log( `Ignoring non-remote repository URL '${repoUrl}' from purl lookup.`, ); } return undefined; } return { type: purlObj.type, registry, repoUrl, version: purlObj.version, namespace: purlObj.namespace, name: purlObj.name, }; } /** * Clean up cloned source directories. * * @param {string} srcDir directory path to remove */ export function cleanupSourceDir(srcDir) { const normalizePath = (candidatePath) => { if (!candidatePath) { return undefined; } try { return fs.realpathSync.native ? fs.realpathSync.native(candidatePath) : fs.realpathSync(candidatePath); } catch { return path.resolve(candidatePath); } }; const normalizedSrcDir = normalizePath(srcDir); const tempRoots = [getTmpDir()]; if (process.platform !== "win32") { tempRoots.push("/tmp"); tempRoots.push("/private/tmp"); } const normalizedTmpDirs = tempRoots .map((candidatePath) => normalizePath(candidatePath)) .filter(Boolean); if ( normalizedSrcDir && normalizedTmpDirs.some( (normalizedTmpDir) => normalizedSrcDir === normalizedTmpDir || normalizedSrcDir.startsWith(`${normalizedTmpDir}${path.sep}`), ) && fs.rmSync ) { thoughtLog(`Cleaning up ${srcDir}`); safeRmSync(srcDir, { recursive: true, force: true }); } }