UNPKG

@cyclonedx/cdxgen

Version:

Creates CycloneDX Software Bill of Materials (SBOM) from source or container image

1,729 lines (1,660 loc) 467 kB
import { Buffer } from "node:buffer"; import { spawnSync } from "node:child_process"; import { createHash, randomUUID } from "node:crypto"; import { constants, chmodSync, copyFileSync, createReadStream, existsSync, lstatSync, mkdirSync, mkdtempSync, readFileSync, rmSync, unlinkSync, writeFileSync, } from "node:fs"; import { homedir, platform, tmpdir } from "node:os"; import path, { basename, delimiter as _delimiter, dirname, extname, join, resolve, relative, sep as _sep, } from "node:path"; import process from "node:process"; import { URL, fileURLToPath } from "node:url"; import toml from "@iarna/toml"; import Arborist from "@npmcli/arborist"; import { load } from "cheerio"; import { parseEDNString } from "edn-data"; import { globSync } from "glob"; import got from "got"; import iconv from "iconv-lite"; import { load as _load } from "js-yaml"; import StreamZip from "node-stream-zip"; import { PackageURL } from "packageurl-js"; import propertiesReader from "properties-reader"; import { clean, coerce, compare, maxSatisfying, parse, satisfies, valid, } from "semver"; import { IriValidationStrategy, validateIri } from "validate-iri"; import { xml2js } from "xml-js"; import { getTreeWithPlugin } from "../managers/piptree.js"; import { thoughtLog } from "./logger.js"; let url = import.meta.url; if (!url.startsWith("file://")) { url = new URL(`file://${import.meta.url}`).toString(); } // TODO: verify if this is a good method (Prabhu) // this is due to dirNameStr being "cdxgen/lib/helpers" which causes errors export const dirNameStr = import.meta ? dirname(dirname(dirname(fileURLToPath(url)))) : __dirname; export const isSecureMode = ["true", "1"].includes(process.env?.CDXGEN_SECURE_MODE) || process.env?.NODE_OPTIONS?.includes("--permission"); export const isWin = platform() === "win32"; export const isMac = platform() === "darwin"; export let ATOM_DB = join(homedir(), ".local", "share", ".atomdb"); if (isWin) { ATOM_DB = join(homedir(), "AppData", "Local", ".atomdb"); } else if (isMac) { ATOM_DB = join(homedir(), "Library", "Application Support", ".atomdb"); } /** * Safely check if a file path exists without crashing due to a lack of permissions * * @param {String} filePath File path * @Boolean True if the path exists. False otherwise */ export function safeExistsSync(filePath) { if (isSecureMode && process.permission) { if (!process.permission.has("fs.read", join(filePath, "", "*"))) { if (DEBUG_MODE) { console.log(`cdxgen lacks read permission for: ${filePath}`); } return false; } } return existsSync(filePath); } /** * Safely create a directory without crashing due to a lack of permissions * * @param {String} filePath File path * @param options {Options} mkdir options * @Boolean True if the path exists. False otherwise */ export function safeMkdirSync(filePath, options) { if (isSecureMode && process.permission) { if (!process.permission.has("fs.write", join(filePath, "", "*"))) { if (DEBUG_MODE) { console.log(`cdxgen lacks write permission for: ${filePath}`); } return undefined; } } return mkdirSync(filePath, options); } const licenseMapping = JSON.parse( readFileSync(join(dirNameStr, "data", "lic-mapping.json"), "utf-8"), ); const vendorAliases = JSON.parse( readFileSync(join(dirNameStr, "data", "vendor-alias.json"), "utf-8"), ); const spdxLicenses = JSON.parse( readFileSync(join(dirNameStr, "data", "spdx-licenses.json"), "utf-8"), ); const knownLicenses = JSON.parse( readFileSync(join(dirNameStr, "data", "known-licenses.json"), "utf-8"), ); const mesonWrapDB = JSON.parse( readFileSync(join(dirNameStr, "data", "wrapdb-releases.json"), "utf-8"), ); export const frameworksList = JSON.parse( readFileSync(join(dirNameStr, "data", "frameworks-list.json"), "utf-8"), ); const selfPJson = JSON.parse( readFileSync(join(dirNameStr, "package.json"), "utf-8"), ); const CPP_STD_MODULES = JSON.parse( readFileSync(join(dirNameStr, "data", "glibc-stdlib.json"), "utf-8"), ); const _version = selfPJson.version; // Refer to contrib/py-modules.py for a script to generate this list // The script needs to be used once every few months to update this list const PYTHON_STD_MODULES = JSON.parse( readFileSync(join(dirNameStr, "data", "python-stdlib.json"), "utf-8"), ); // Mapping between modules and package names const PYPI_MODULE_PACKAGE_MAPPING = JSON.parse( readFileSync(join(dirNameStr, "data", "pypi-pkg-aliases.json"), "utf-8"), ); // FIXME. This has to get removed, once we improve the module detection one-liner. // If you're a Rubyist, please help us improve this code. const RUBY_KNOWN_MODULES = JSON.parse( readFileSync(join(dirNameStr, "data", "ruby-known-modules.json"), "utf-8"), ); // Debug mode flag export const DEBUG_MODE = ["debug", "verbose"].includes(process.env.CDXGEN_DEBUG_MODE) || process.env.SCAN_DEBUG_MODE === "debug" || process.env.NODE_ENV === "development"; // Timeout milliseconds. Default 20 mins export const TIMEOUT_MS = Number.parseInt(process.env.CDXGEN_TIMEOUT_MS) || 20 * 60 * 1000; // Max buffer for stdout and stderr. Defaults to 100MB export const MAX_BUFFER = Number.parseInt(process.env.CDXGEN_MAX_BUFFER) || 100 * 1024 * 1024; // Metadata cache export let metadata_cache = {}; // Speed up lookup namespaces for a given jar const jarNSMapping_cache = {}; // Temporary files written by cdxgen, will be removed on exit const temporaryFiles = new Set(); process.on("exit", () => temporaryFiles.forEach((tempFile) => { if (existsSync(tempFile)) { unlinkSync(tempFile); } }), ); // Whether test scope shall be included for java/maven projects; default, if unset shall be 'true' export const includeMavenTestScope = !process.env.CDX_MAVEN_INCLUDE_TEST_SCOPE || ["true", "1"].includes(process.env.CDX_MAVEN_INCLUDE_TEST_SCOPE); // Whether to use the native maven dependency tree command. Defaults to true. export const PREFER_MAVEN_DEPS_TREE = !["false", "0"].includes( process.env?.PREFER_MAVEN_DEPS_TREE, ); export function shouldFetchLicense() { return ( process.env.FETCH_LICENSE && ["true", "1"].includes(process.env.FETCH_LICENSE) ); } export function shouldFetchVCS() { return ( process.env.GO_FETCH_VCS && ["true", "1"].includes(process.env.GO_FETCH_VCS) ); } // Whether license information should be fetched export const FETCH_LICENSE = shouldFetchLicense(); // Whether search.maven.org will be used to identify jars without maven metadata; default, if unset shall be 'true' export const SEARCH_MAVEN_ORG = !process.env.SEARCH_MAVEN_ORG || ["true", "1"].includes(process.env.SEARCH_MAVEN_ORG); // circuit breaker for search maven.org let search_maven_org_errors = 0; const MAX_SEARCH_MAVEN_ORG_ERRORS = 1; // circuit breaker for get repo license let get_repo_license_errors = 0; const MAX_GET_REPO_LICENSE_ERRORS = 5; const MAX_LICENSE_ID_LENGTH = 100; export const JAVA_CMD = getJavaCommand(); export function getJavaCommand() { let javaCmd = "java"; if (process.env.JAVA_CMD) { javaCmd = process.env.JAVA_CMD; } else if ( process.env.JAVA_HOME && safeExistsSync(process.env.JAVA_HOME) && safeExistsSync(join(process.env.JAVA_HOME, "bin", "java")) ) { javaCmd = join(process.env.JAVA_HOME, "bin", "java"); } return javaCmd; } export const PYTHON_CMD = getPythonCommand(); export function getPythonCommand() { let pythonCmd = "python"; if (process.env.PYTHON_CMD) { pythonCmd = process.env.PYTHON_CMD; } else if (process.env.CONDA_PYTHON_EXE) { pythonCmd = process.env.CONDA_PYTHON_EXE; } return pythonCmd; } export let DOTNET_CMD = "dotnet"; if (process.env.DOTNET_CMD) { DOTNET_CMD = process.env.DOTNET_CMD; } export let NODE_CMD = "node"; if (process.env.NODE_CMD) { NODE_CMD = process.env.NODE_CMD; } export let NPM_CMD = "npm"; if (process.env.NPM_CMD) { NPM_CMD = process.env.NPM_CMD; } export let YARN_CMD = "yarn"; if (process.env.YARN_CMD) { YARN_CMD = process.env.YARN_CMD; } export let GCC_CMD = "gcc"; if (process.env.GCC_CMD) { GCC_CMD = process.env.GCC_CMD; } export let RUSTC_CMD = "rustc"; if (process.env.RUSTC_CMD) { RUSTC_CMD = process.env.RUSTC_CMD; } export let GO_CMD = "go"; if (process.env.GO_CMD) { GO_CMD = process.env.GO_CMD; } export let CARGO_CMD = "cargo"; if (process.env.CARGO_CMD) { CARGO_CMD = process.env.CARGO_CMD; } // Clojure CLI export let CLJ_CMD = "clj"; if (process.env.CLJ_CMD) { CLJ_CMD = process.env.CLJ_CMD; } export let LEIN_CMD = "lein"; if (process.env.LEIN_CMD) { LEIN_CMD = process.env.LEIN_CMD; } export let CDXGEN_TEMP_DIR = "temp"; if (process.env.CDXGEN_TEMP_DIR) { CDXGEN_TEMP_DIR = process.env.CDXGEN_TEMP_DIR; } // On a mac, use xcrun // xcrun: Find and execute the named command line tool from the active developer directory export const SWIFT_CMD = process.env.SWIFT_CMD || isMac ? "xcrun swift" : "swift"; export const RUBY_CMD = process.env.RUBY_CMD || "ruby"; // Python components that can be excluded export const PYTHON_EXCLUDED_COMPONENTS = [ "pip", "setuptools", "wheel", "conda", "conda-build", "conda-index", "conda-libmamba-solver", "conda-package-handling", "conda-package-streaming", "conda-content-trust", ]; // Project type aliases export const PROJECT_TYPE_ALIASES = { java: [ "java", "java8", "java11", "java17", "java21", "java22", "java23", "java24", "groovy", "kotlin", "kt", "scala", "jvm", "gradle", "mvn", "maven", "sbt", "bazel", "quarkus", ], android: ["android", "apk", "aab"], jar: ["jar", "war", "ear"], "gradle-index": ["gradle-index", "gradle-cache"], "sbt-index": ["sbt-index", "sbt-cache"], "maven-index": ["maven-index", "maven-cache", "maven-core"], js: [ "npm", "pnpm", "nodejs", "nodejs8", "nodejs10", "nodejs12", "nodejs14", "nodejs16", "nodejs18", "nodejs20", "nodejs22", "nodejs23", "node", "node8", "node10", "node12", "node14", "node16", "node18", "node20", "node22", "node23", "js", "javascript", "typescript", "ts", "tsx", "vsix", "yarn", "rush", ], py: [ "py", "python", "pypi", "python36", "python38", "python39", "python310", "python311", "python312", "python313", "pixi", "pip", "poetry", "uv", "pdm", "hatch", ], go: ["go", "golang", "gomod", "gopkg"], rust: ["rust", "rust-lang", "cargo"], php: ["php", "composer", "wordpress"], ruby: ["ruby", "gems", "rubygems", "bundler", "rb", "gemspec"], csharp: [ "csharp", "netcore", "netcore2.1", "netcore3.1", "dotnet", "dotnet6", "dotnet7", "dotnet8", "dotnet9", "dotnet-framework", "dotnet-framework47", "dotnet-framework48", "vb", "fsharp", "twincat", "csproj", "tsproj", "vbproj", "sln", "fsproj", "plcproj", "hmiproj", ], dart: ["dart", "flutter", "pub"], haskell: ["haskell", "hackage", "cabal"], elixir: ["elixir", "hex", "mix"], c: ["c", "cpp", "c++", "conan"], clojure: ["clojure", "edn", "clj", "leiningen"], github: ["github", "actions"], os: ["os", "osquery", "windows", "linux", "mac", "macos", "darwin"], jenkins: ["jenkins", "hpi"], helm: ["helm", "charts"], "helm-index": ["helm-index", "helm-repo"], universal: [ "universal", "containerfile", "docker-compose", "dockerfile", "swarm", "tekton", "kustomize", "operator", "skaffold", "kubernetes", "openshift", "yaml-manifest", ], cloudbuild: ["cloudbuild"], swift: [ "swift", "ios", "macos", "swiftpm", "ipados", "tvos", "watchos", "visionos", ], binary: ["binary", "blint"], oci: ["docker", "oci", "container", "podman"], cocoa: ["cocoa", "cocoapods", "objective-c", "swift", "ios"], scala: ["scala", "scala3", "sbt", "mill"], }; // Package manager aliases export const PACKAGE_MANAGER_ALIASES = { scala: ["sbt"], }; /** * Method to check if a given feature flag is enabled. * * @param {Object} cliOptions CLI options * @param {String} feature Feature flag * * @returns {Boolean} True if the feature is enabled */ export function isFeatureEnabled(cliOptions, feature) { if (cliOptions?.featureFlags?.includes(feature)) { return true; } if ( process.env[feature.toUpperCase()] && ["true", "1"].includes(process.env[feature.toUpperCase()]) ) { return true; } // Retry by replacing hyphens with underscore return !!( process.env[feature.replaceAll("-", "_").toUpperCase()] && ["true", "1"].includes( process.env[feature.replaceAll("-", "_").toUpperCase()], ) ); } /** * Method to check if the given project types are allowed by checking against include and exclude types passed from the CLI arguments. * * @param {Array} projectTypes project types to check * @param {Object} options CLI options * @param {Boolean} defaultStatus Default return value if there are no types provided */ export function hasAnyProjectType(projectTypes, options, defaultStatus = true) { // If no project type is specified, then consider it as yes if ( !projectTypes || (!options.projectType?.length && !options.excludeType?.length) ) { return defaultStatus; } // Convert string project types to an array if ( projectTypes && (typeof projectTypes === "string" || projectTypes instanceof String) ) { projectTypes = projectTypes.split(","); } // If only exclude type is specified, then do not allow oci type if ( (projectTypes?.length === 1 || !defaultStatus) && !options.projectType?.length && options.excludeType?.length ) { return ( !projectTypes.includes("oci") && !projectTypes.includes("oci-dir") && !projectTypes.includes("os") && !projectTypes.includes("docker") && !options.excludeType.includes("oci") ); } const allProjectTypes = [...projectTypes]; // Convert the project types into base types const baseProjectTypes = []; // Support for arbitray versioned ruby type if (projectTypes.filter((p) => p.startsWith("ruby")).length) { baseProjectTypes.push("ruby"); } const baseExcludeTypes = []; for (const abt of Object.keys(PROJECT_TYPE_ALIASES)) { if ( PROJECT_TYPE_ALIASES[abt].filter((pt) => new Set(options?.projectType).has(pt), ).length ) { baseProjectTypes.push(abt); } if ( PROJECT_TYPE_ALIASES[abt].filter((pt) => new Set(projectTypes).has(pt)) .length ) { allProjectTypes.push(abt); } if ( PROJECT_TYPE_ALIASES[abt].filter((pt) => new Set(options?.excludeType).has(pt), ).length ) { baseExcludeTypes.push(abt); } } const shouldInclude = !options.projectType?.length || options.projectType?.includes("universal") || options.projectType?.filter((pt) => new Set(allProjectTypes).has(pt)) .length > 0 || baseProjectTypes.filter((pt) => new Set(allProjectTypes).has(pt)).length > 0; if (shouldInclude && options.excludeType) { return ( !baseExcludeTypes.filter((pt) => pt && new Set(baseProjectTypes).has(pt)) .length && !baseExcludeTypes.filter((pt) => pt && new Set(allProjectTypes).has(pt)) .length ); } return shouldInclude; } /** * Convenient method to check if the given package manager is allowed. * * @param {String} name Package manager name * @param {Array} conflictingManagers List of package managers * @param {Object} options CLI options * * @returns {Boolean} True if the package manager is allowed */ export function isPackageManagerAllowed(name, conflictingManagers, options) { for (const apm of conflictingManagers) { if (options?.projectType?.includes(apm)) { return false; } } const res = !options.excludeType?.filter( (p) => p === name || PACKAGE_MANAGER_ALIASES[p]?.includes(name), ).length; if (res) { thoughtLog( `**PACKAGE MANAGER**: Let's make use of the package manager '${name}', which is allowed.`, ); } return res; } // HTTP cache const gotHttpCache = new Map(); function isCacheDisabled() { return ( process.env.CDXGEN_NO_CACHE && ["true", "1"].includes(process.env.CDXGEN_NO_CACHE) ); } const cache = isCacheDisabled() ? undefined : gotHttpCache; // Custom user-agent for cdxgen export const cdxgenAgent = got.extend({ headers: { "user-agent": `@CycloneDX/cdxgen ${_version}`, }, cache, retry: { limit: 0, }, }); /** * Method to get files matching a pattern * * @param {string} dirPath Root directory for search * @param {string} pattern Glob pattern (eg: *.gradle) * @param {Object} options CLI options */ export function getAllFiles(dirPath, pattern, options = {}) { let ignoreList = [ "**/.hg/**", "**/.git/**", "**/venv/**", "**/examples/**", "**/site-packages/**", "**/flow-typed/**", "**/coverage/**", ]; // Only ignore node_modules if the caller is not looking for package.json if (!pattern.includes("package.json")) { ignoreList.push("**/node_modules/**"); } // ignore docs only for non-lock file lookups if ( !pattern.includes("package.json") && !pattern.includes("package-lock.json") && !pattern.includes("yarn.lock") && !pattern.includes("pnpm-lock.yaml") ) { ignoreList.push("**/docs/**"); } if (options?.exclude && Array.isArray(options.exclude)) { ignoreList = ignoreList.concat(options.exclude); } return getAllFilesWithIgnore(dirPath, pattern, ignoreList); } /** * Method to get files matching a pattern * * @param {string} dirPath Root directory for search * @param {string} pattern Glob pattern (eg: *.gradle) * @param {Array} ignoreList Directory patterns to ignore */ export function getAllFilesWithIgnore(dirPath, pattern, ignoreList) { try { const files = globSync(pattern, { cwd: dirPath, absolute: true, nocase: true, nodir: true, dot: pattern.startsWith("."), follow: false, ignore: ignoreList, }); if (files.length > 1) { thoughtLog( `Found ${files.length} files for the pattern '${pattern}' at '${dirPath}'.`, ); } return files; } catch (err) { if (DEBUG_MODE) { console.error(err); } return []; } } /** * Method to encode hex string to base64 string * * @param {string} hexString hex string * @returns {string} base64 encoded string */ function toBase64(hexString) { return Buffer.from(hexString, "hex").toString("base64"); } /** * Return the current timestamp in YYYY-MM-DDTHH:MM:SSZ format. * * @returns {string} ISO formatted timestamp, without milliseconds. */ export function getTimestamp() { return `${new Date().toISOString().split(".")[0]}Z`; } export function getTmpDir() { if ( process.env.CDXGEN_TEMP_DIR && !safeExistsSync(process.env.CDXGEN_TEMP_DIR) ) { safeMkdirSync(process.env.CDXGEN_TEMP_DIR, { recursive: true }); } return process.env.CDXGEN_TEMP_DIR || tmpdir(); } /** * Method to determine if a license is a valid SPDX license expression * * @param {string} license License string * @returns {boolean} true if the license is a valid SPDX license expression * @see https://spdx.dev/learn/handling-license-info/ **/ export function isSpdxLicenseExpression(license) { if (!license) { return false; } if (/[(\s]+/g.test(license)) { return true; } return !!license.endsWith("+"); } /** * Convert the array of licenses to a CycloneDX 1.5 compliant license array. * This should return an array containing: * - one or more SPDX license if no expression is present * - the license of the expression if one expression is present * - a unified conditional 'OR' license expression if more than one expression is present * * @param {Array} licenses Array of licenses * @returns {Array} CycloneDX 1.5 compliant license array */ export function adjustLicenseInformation(licenses) { if (!licenses || !Array.isArray(licenses)) { return []; } const expressions = licenses.filter((f) => { return f.expression; }); if (expressions.length >= 1) { if (expressions.length > 1) { return [ { expression: expressions .map((e) => e.expression || "") .filter(Boolean) .join(" OR "), }, ]; } return [{ expression: expressions[0].expression }]; } return licenses.map((l) => { if (typeof l.license === "object") { return l; } return { license: l }; }); } /** * Performs a lookup + validation of the license specified in the * package. If the license is a valid SPDX license ID, set the 'id' * and url of the license object, otherwise, set the 'name' of the license * object. */ export function getLicenses(pkg) { let license = pkg.license && (pkg.license.type || pkg.license); if (license) { if (!Array.isArray(license)) { license = [license]; } return adjustLicenseInformation( license .filter((l) => l !== undefined) .map((l) => { let licenseContent = {}; if (typeof l === "string" || l instanceof String) { if ( spdxLicenses.some((v) => { return l === v; }) ) { licenseContent.id = l; licenseContent.url = `https://opensource.org/licenses/${l}`; } else if (l.startsWith("http")) { const knownLicense = getKnownLicense(l, pkg); if (knownLicense) { licenseContent.id = knownLicense.id; licenseContent.name = knownLicense.name; } // We always need a name to avoid validation errors // Issue: #469 if (!licenseContent.name && !licenseContent.id) { licenseContent.name = "CUSTOM"; } licenseContent.url = l; } else if (isSpdxLicenseExpression(l)) { licenseContent.expression = l; } else { licenseContent.name = l; } } else if (Object.keys(l).length) { licenseContent = l; } else { return undefined; } if (!licenseContent.id) { addLicenseText(pkg, l, licenseContent); } return licenseContent; }), ); } const knownLicense = getKnownLicense(undefined, pkg); if (knownLicense) { return [{ license: knownLicense }]; } return undefined; } /** * Method to retrieve known license by known-licenses.json * * @param {String} licenseUrl Repository url * @param {String} pkg Bom ref * @return {Object} Objetct with SPDX license id or license name */ export function getKnownLicense(licenseUrl, pkg) { if (licenseUrl?.includes("opensource.org")) { const possibleId = licenseUrl .toLowerCase() .replace("https://", "http://") .replace("http://www.opensource.org/licenses/", ""); for (const spdxLicense of spdxLicenses) { if (spdxLicense.toLowerCase() === possibleId) { return { id: spdxLicense }; } } } else if (licenseUrl?.includes("apache.org")) { const possibleId = licenseUrl .toLowerCase() .replace("https://", "http://") .replace("http://www.apache.org/licenses/license-", "apache-") .replace(".txt", ""); for (const spdxLicense of spdxLicenses) { if (spdxLicense.toLowerCase() === possibleId) { return { id: spdxLicense }; } } } for (const akLicGroup of knownLicenses) { if ( akLicGroup.packageNamespace === "*" || pkg.purl?.startsWith(akLicGroup.packageNamespace) ) { for (const akLic of akLicGroup.knownLicenses) { if (akLic.group && akLic.name) { if (akLic.group === "." && akLic.name === pkg.name) { return { id: akLic.license, name: akLic.licenseName }; } if ( pkg.group?.includes(akLic.group) && (akLic.name === pkg.name || akLic.name === "*") ) { return { id: akLic.license, name: akLic.licenseName }; } } if ( akLic.urlIncludes && licenseUrl && licenseUrl.includes(akLic.urlIncludes) ) { return { id: akLic.license, name: akLic.licenseName }; } if ( akLic.urlEndswith && licenseUrl && licenseUrl.endsWith(akLic.urlEndswith) ) { return { id: akLic.license, name: akLic.licenseName }; } } } } return undefined; } /** * Tries to find a file containing the license text based on commonly * used naming and content types. If a candidate file is found, add * the text to the license text object and stop. */ export function addLicenseText(pkg, l, licenseContent) { const licenseFilenames = [ "LICENSE", "License", "license", "LICENCE", "Licence", "licence", "NOTICE", "Notice", "notice", ]; const licenseContentTypes = { "text/plain": "", "text/txt": ".txt", "text/markdown": ".md", "text/xml": ".xml", }; /* Loops over different name combinations starting from the license specified naming (e.g., 'LICENSE.Apache-2.0') and proceeding towards more generic names. */ for (const licenseName of [`.${l}`, ""]) { for (const licenseFilename of licenseFilenames) { for (const [licenseContentType, fileExtension] of Object.entries( licenseContentTypes, )) { const licenseFilepath = `${pkg.realPath}/${licenseFilename}${licenseName}${fileExtension}`; if (safeExistsSync(licenseFilepath)) { licenseContent.text = readLicenseText( licenseFilepath, licenseContentType, ); return; } } } } } /** * Read the file from the given path to the license text object and includes * content-type attribute, if not default. Returns the license text object. */ export function readLicenseText(licenseFilepath, licenseContentType) { const licenseText = readFileSync(licenseFilepath, "utf8"); if (licenseText) { const licenseContentText = { content: licenseText }; if (licenseContentType !== "text/plain") { licenseContentText["contentType"] = licenseContentType; } return licenseContentText; } return null; } export async function getSwiftPackageMetadata(pkgList) { const cdepList = []; for (const p of pkgList) { if (p.repository?.url) { if (p.repository.url.includes("://github.com/")) { try { p.license = await getRepoLicense(p.repository.url, undefined); } catch (e) { console.error("error fetching repo license from", p.repository.url); } } else { if (DEBUG_MODE) { console.log( p.repository.url, "is currently not supported to fetch for licenses", ); } } } else { if (DEBUG_MODE) { console.warn("no repository url found for", p.name); } } cdepList.push(p); } return cdepList; } /** * Method to retrieve metadata for npm packages by querying npmjs * * @param {Array} pkgList Package list */ export async function getNpmMetadata(pkgList) { const NPM_URL = process.env.NPM_URL || "https://registry.npmjs.org/"; const cdepList = []; for (const p of pkgList) { try { let key = p.name; if (p.group && p.group !== "") { let group = p.group; if (!group.startsWith("@")) { group = `@${group}`; } key = `${group}/${p.name}`; } let body = {}; if (metadata_cache[key]) { body = metadata_cache[key]; } else { const res = await cdxgenAgent.get(NPM_URL + key, { responseType: "json", }); body = res.body; metadata_cache[key] = body; } p.description = body.versions?.[p.version]?.description || body.description; p.license = body.versions?.[p.version]?.license || body.license || (await getRepoLicense(body.repository?.url, undefined)); if (body.repository?.url) { p.repository = { url: body.repository.url }; } if (body.homepage) { p.homepage = { url: body.homepage }; } cdepList.push(p); } catch (err) { cdepList.push(p); if (DEBUG_MODE) { console.error(p, "was not found on npm"); } } } return cdepList; } /** * Parse nodejs package json file * * @param {string} pkgJsonFile package.json file * @param {boolean} simple Return a simpler representation of the component by skipping extended attributes and license fetch. */ export async function parsePkgJson(pkgJsonFile, simple = false) { const pkgList = []; if (safeExistsSync(pkgJsonFile)) { try { const pkgData = JSON.parse(readFileSync(pkgJsonFile, "utf8")); const pkgIdentifier = parsePackageJsonName(pkgData.name); let name = pkgIdentifier.fullName || pkgData.name; if (!name && !pkgJsonFile.includes("node_modules")) { name = basename(dirname(pkgJsonFile)); } const group = pkgIdentifier.scope || ""; const purl = new PackageURL( "npm", group, name, pkgData.version, null, null, ).toString(); const author = pkgData.author; const authorString = author instanceof Object ? `${author.name}${author.email ? ` <${author.email}>` : ""}${ author.url ? ` (${author.url})` : "" }` : author; const apkg = { name, group, version: pkgData.version, description: pkgData.description, purl: purl, "bom-ref": decodeURIComponent(purl), author: authorString, license: pkgData.license, }; if (pkgData.homepage) { apkg.homepage = { url: pkgData.homepage }; } if (pkgData.repository?.url) { apkg.repository = { url: pkgData.repository.url }; } if (!simple) { apkg.properties = [ { name: "SrcFile", value: pkgJsonFile, }, ]; apkg.evidence = { identity: { field: "purl", confidence: 0.7, methods: [ { technique: "manifest-analysis", confidence: 0.7, value: pkgJsonFile, }, ], }, }; } pkgList.push(apkg); } catch (err) { // continue regardless of error } } if (!simple && shouldFetchLicense() && pkgList && pkgList.length) { if (DEBUG_MODE) { console.log( `About to fetch license information for ${pkgList.length} packages in parsePkgJson`, ); } return await getNpmMetadata(pkgList); } return pkgList; } /** * Parse nodejs package lock file * * @param {string} pkgLockFile package-lock.json file * @param {object} options Command line options */ export async function parsePkgLock(pkgLockFile, options = {}) { let pkgList = []; let dependenciesList = []; if (!options) { options = {}; } const pkgSpecVersionCache = {}; if (!safeExistsSync(pkgLockFile)) { return { pkgList, dependenciesList, }; } const parseArboristNode = ( node, rootNode, parentRef = null, visited = new Set(), pkgSpecVersionCache = {}, options = {}, ) => { if (visited.has(node)) { return { pkgList: [], dependenciesList: [] }; } visited.add(node); let pkgList = []; let dependenciesList = []; // Create the package entry const srcFilePath = node.path.includes(`${_sep}node_modules`) ? node.path.split(`${_sep}node_modules`)[0] : node.path; const scope = node.dev === true ? "optional" : undefined; const integrity = node.integrity ? node.integrity : undefined; let pkg; let purlString; const author = node.package.author; const authorString = author instanceof Object ? `${author.name}${author.email ? ` <${author.email}>` : ""}${ author.url ? ` (${author.url})` : "" }` : author; if (node === rootNode) { purlString = new PackageURL( "npm", options.projectGroup || "", "project-name" in options ? options.projectName : node.packageName, options.projectVersion || node.version, null, null, ) .toString() .replace(/%2F/g, "/"); pkg = { author: authorString, group: options.projectGroup || "", name: "project-name" in options ? options.projectName : node.packageName, version: options.projectVersion || node.version, type: "application", purl: purlString, "bom-ref": decodeURIComponent(purlString), }; } else { purlString = new PackageURL( "npm", "", node.packageName, node.version, null, null, ) .toString() .replace(/%2F/g, "/"); const pkgLockFile = join( srcFilePath.replace("/", _sep), "package-lock.json", ); pkg = { group: "", name: node.packageName, version: node.version, author: authorString, scope: scope, _integrity: integrity, externalReferences: [], properties: [ { name: "SrcFile", value: pkgLockFile, }, ], evidence: { identity: { field: "purl", confidence: 1, methods: [ { technique: "manifest-analysis", confidence: 1, value: pkgLockFile, }, ], }, }, type: parentRef ? "npm" : "application", purl: purlString, "bom-ref": decodeURIComponent(purlString), }; if (node.resolved) { if (node.resolved.startsWith("file:")) { pkg.properties.push({ name: "cdx:npm:resolvedPath", value: node.realpath ? relative(dirname(pkgLockFile), node.realpath) : relative( dirname(pkgLockFile), resolve(node.resolved.replace("file:", "")), ), }); } else { pkg.properties.push({ name: "ResolvedUrl", value: node.resolved, }); pkg.distribution = { url: node.resolved }; } } if (node.location) { pkg.properties.push({ name: "LocalNodeModulesPath", value: node.location, }); } if (node?.installLinks) { pkg.properties.push({ name: "cdx:npm:installLinks", value: "true", }); } if (node?.binPaths?.length) { pkg.properties.push({ name: "cdx:npm:binPaths", value: node.binPaths.join(", "), }); } if (node?.hasInstallScript) { pkg.properties.push({ name: "cdx:npm:hasInstallScript", value: "true", }); } if (node?.isLink) { pkg.properties.push({ name: "cdx:npm:isLink", value: "true", }); } // This getter method could fail with errors at times. // Example Error: Invalid tag name "^>=6.0.0" of package "^>=6.0.0": Tags may not have any characters that encodeURIComponent encodes. try { if (!node?.isRegistryDependency) { pkg.properties.push({ name: "cdx:npm:isRegistryDependency", value: "false", }); } } catch (err) { // ignore } if (node?.isWorkspace) { pkg.properties.push({ name: "cdx:npm:isWorkspace", value: "true", }); } if (node?.inBundle) { pkg.properties.push({ name: "cdx:npm:inBundle", value: "true", }); } if (node?.inDepBundle) { pkg.properties.push({ name: "cdx:npm:inDepBundle", value: "true", }); } if (node.package?.repository?.url) { pkg.externalReferences.push({ type: "vcs", url: node.package.repository.url, }); } if (node.package?.bugs?.url) { pkg.externalReferences.push({ type: "issue-tracker", url: node.package.bugs.url, }); } if (node?.package?.keywords?.length) { pkg.tags = Array.isArray(node.package.keywords) ? node.package.keywords.sort() : node.package.keywords.split(","); } } if (node.package?.license) { // License will be overridden if shouldFetchLicense() is enabled pkg.license = node.package.license; } const deprecatedMessage = node.package?.deprecated; if (deprecatedMessage) { pkg.properties.push({ name: "cdx:npm:deprecated", value: deprecatedMessage, }); } pkgList.push(pkg); // retrieve workspace node pkglists const workspaceDependsOn = []; if (node.fsChildren && node.fsChildren.size > 0) { for (const workspaceNode of node.fsChildren) { const { pkgList: childPkgList, dependenciesList: childDependenciesList, } = parseArboristNode( workspaceNode, rootNode, purlString, visited, pkgSpecVersionCache, options, ); pkgList = pkgList.concat(childPkgList); dependenciesList = dependenciesList.concat(childDependenciesList); let depWorkspacePurlString = decodeURIComponent( new PackageURL( "npm", "", workspaceNode.name, workspaceNode.version, null, null, ) .toString() .replace(/%2F/g, "/"), ); let purlStringFromPkgid; if (workspaceNode.pkgid) { purlStringFromPkgid = `pkg:npm/${workspaceNode.pkgid.replace(`${workspaceNode.name}@npm:`, "")}`; } if ( purlStringFromPkgid && purlStringFromPkgid !== depWorkspacePurlString ) { if (DEBUG_MODE) { console.log( `Internal warning: Got two different refs for this workspace node: ${depWorkspacePurlString} and ${purlStringFromPkgid}. Assuming the bom-ref as ${purlStringFromPkgid} based on pkgid.`, ); } depWorkspacePurlString = purlStringFromPkgid; } if (decodeURIComponent(purlString) !== depWorkspacePurlString) { workspaceDependsOn.push(depWorkspacePurlString); } } } // this handles the case when a node has ["dependencies"] key in a package-lock.json // for a node. We exclude the root node because it's already been handled // If the node has "requires", we don't have to track the "dependencies" const childrenDependsOn = []; if (node !== rootNode && !node.edgesOut.size) { for (const child of node.children) { const childNode = child[1]; const { pkgList: childPkgList, dependenciesList: childDependenciesList, } = parseArboristNode( childNode, rootNode, decodeURIComponent(purlString), visited, pkgSpecVersionCache, options, ); pkgList = pkgList.concat(childPkgList); dependenciesList = dependenciesList.concat(childDependenciesList); const depChildString = decodeURIComponent( new PackageURL( "npm", "", childNode.name, childNode.version, null, null, ) .toString() .replace(/%2F/g, "/"), ); if (decodeURIComponent(purlString) !== depChildString) { childrenDependsOn.push(depChildString); } } } // this handles the case when a node has a ["requires"] key const pkgDependsOn = []; for (const edge of node.edgesOut.values()) { let targetVersion; let targetName; let foundMatch = false; // This cache is required to help us down the line. if (edge?.to?.version && edge?.spec) { pkgSpecVersionCache[`${edge.name}-${edge.spec}`] = edge.to.version; } // if the edge doesn't have an integrity, it's likely a peer dependency // which isn't installed // Bug #795. At times, npm loses the integrity node completely and such packages are getting missed out // To keep things safe, we include these packages. let edgeToIntegrityOrLocation = edge.to ? edge.to.integrity : undefined; // Fallback to location based lookups when integrity is missing if (!edgeToIntegrityOrLocation && edge.to && edge.to.location) { edgeToIntegrityOrLocation = edge.to.location; } if (!edgeToIntegrityOrLocation) { // This hack is required to fix the package name targetName = edge.name.replace(/-cjs$/, ""); foundMatch = false; } else { // the edges don't actually contain a version, so we need to search the root node // children to find the correct version. we check the node children first, then // we check the root node children for (const child of node.children) { if (edgeToIntegrityOrLocation) { if ( child[1].integrity === edgeToIntegrityOrLocation || child[1].location === edgeToIntegrityOrLocation ) { targetName = child[0].replace(/node_modules\//g, ""); // The package name could be different from the targetName retrieved // Eg: "string-width-cjs": "npm:string-width@^4.2.0", if (child[1].packageName && child[1].packageName !== targetName) { targetName = child[1].packageName; } targetVersion = child[1].version; foundMatch = true; break; } } } } if (!foundMatch) { for (const child of rootNode.children) { if ( edgeToIntegrityOrLocation && (child[1].integrity === edgeToIntegrityOrLocation || child[1].location === edgeToIntegrityOrLocation) ) { targetName = child[0].replace(/node_modules\//g, ""); targetVersion = child[1].version; // The package name could be different from the targetName retrieved // "string-width-cjs": "npm:string-width@^4.2.0", if (child[1].packageName && child[1].packageName !== targetName) { targetName = child[1].packageName; } break; } } if (!targetVersion || !targetName) { if (pkgSpecVersionCache[`${edge.name}-${edge.spec}`]) { targetVersion = pkgSpecVersionCache[`${edge.name}-${edge.spec}`]; targetName = edge.name; } } } // if we can't find the version of the edge, continue // it may be an optional peer dependency if (!targetVersion || !targetName) { if ( DEBUG_MODE && !options.deep && !["optional", "peer", "peerOptional"].includes(edge?.type) ) { if (!targetVersion) { console.log( `Unable to determine the version for the dependency ${edge.name} from the path ${edge?.from?.path}. This is likely an edge case that is not handled.`, edge, ); } else if (!targetName) { console.log( `Unable to determine the name for the dependency from the edge from the path ${edge?.from?.path}. This is likely an edge case that is not handled.`, edge, ); } } // juice-shop // Lock files created with --legacy-peer-deps will have certain peer dependencies missing // This flags any non-missing peers if (DEBUG_MODE && edge?.type === "peer" && edge?.error !== "MISSING") { console.log( `Unable to determine the version for the dependency ${edge.name} from the path ${edge?.from?.path}. This is likely an edge case that is not handled.`, edge, ); } continue; } const depPurlString = decodeURIComponent( new PackageURL("npm", "", targetName, targetVersion, null, null) .toString() .replace(/%2F/g, "/"), ); if (decodeURIComponent(purlString) !== depPurlString) { pkgDependsOn.push(depPurlString); } if (edge.to == null) { continue; } const { pkgList: childPkgList, dependenciesList: childDependenciesList } = parseArboristNode( edge.to, rootNode, decodeURIComponent(purlString), visited, pkgSpecVersionCache, options, ); pkgList = pkgList.concat(childPkgList); dependenciesList = dependenciesList.concat(childDependenciesList); } dependenciesList.push({ ref: decodeURIComponent(purlString), dependsOn: [ ...new Set( workspaceDependsOn.concat(childrenDependsOn).concat(pkgDependsOn), ), ].sort(), }); return { pkgList, dependenciesList }; }; let arb = new Arborist({ path: path.dirname(pkgLockFile), // legacyPeerDeps=false enables npm >v3 package dependency resolution legacyPeerDeps: false, }); let tree = undefined; try { const rootNodeModulesDir = join(path.dirname(pkgLockFile), "node_modules"); if (safeExistsSync(rootNodeModulesDir)) { if (options.deep) { console.log( `Constructing the actual dependency hierarchy from ${rootNodeModulesDir}.`, ); tree = await arb.loadActual(); } else { if (DEBUG_MODE) { console.log( "Constructing virtual dependency tree based on the lock file. Pass --deep argument to construct the actual dependency tree from disk.", ); } tree = await arb.loadVirtual(); } } else { tree = await arb.loadVirtual(); } } catch (e) { console.log( `Unable to parse ${pkgLockFile} without legacy peer dependencies. Retrying ...`, ); if (DEBUG_MODE) { console.log(e); } try { arb = new Arborist({ path: path.dirname(pkgLockFile), legacyPeerDeps: true, }); tree = await arb.loadVirtual(); } catch (e) { console.log( `Unable to parse ${pkgLockFile} in legacy and non-legacy mode. The resulting SBOM would be incomplete.`, ); if (DEBUG_MODE) { console.log(e); } return { pkgList, dependenciesList }; } } if (!tree) { return { pkgList, dependenciesList }; } ({ pkgList, dependenciesList } = parseArboristNode( tree, tree, null, new Set(), pkgSpecVersionCache, options, )); if (shouldFetchLicense() && pkgList && pkgList.length) { if (DEBUG_MODE) { console.log( `About to fetch license information for ${pkgList.length} packages in parsePkgLock`, ); } pkgList = await getNpmMetadata(pkgList); return { pkgList, dependenciesList }; } return { pkgList, dependenciesList, }; } /** * Given a lock file this method would return an Object with the identity as the key and parsed name and value * eg: "@actions/core@^1.2.6", "@actions/core@^1.6.0": * version "1.6.0" * would result in two entries * * @param {string} lockData Yarn Lockfile data */ export function yarnLockToIdentMap(lockData) { const identMap = {}; let currentIdents = []; lockData.split("\n").forEach((l) => { l = l.replace("\r", ""); if (l === "\n" || !l.length || l.startsWith("#")) { return; } // "@actions/core@^1.2.6", "@actions/core@^1.6.0": if (!l.startsWith(" ") && l.trim().length > 0) { const tmpA = l.replace(/["']/g, "").split(", "); if (tmpA?.length) { for (let s of tmpA) { if (!s.startsWith("__")) { if (s.endsWith(":")) { s = s.substring(0, s.length - 1); } // Non-strict mode parsing const match = s.match(/^(?:(@[^/]+?)\/)?([^/]+?)(?:@(.+))?$/); if (!match) { continue; } let [, group, name, range] = match; if (group) { group = `${group}/`; } // "lru-cache@npm:^6.0.0": // "string-width-cjs@npm:string-width@^4.2.0": // Here range can be // - npm:^6.0.0 // - npm:@types/ioredis@^4.28.10 // - npm:strip-ansi@^6.0.1 // See test cases with yarn3.lock and yarn6.lock if (range?.startsWith("npm:")) { if (range.includes("@")) { range = range.split("@").slice(-1)[0]; } else { range = range.replace("npm:", ""); } } currentIdents.push(`${group || ""}${name}|${range}`); } } } } else if ( (l.startsWith(" version") || l.startsWith(' "version')) && currentIdents.length ) { const tmpA = l.replace(/"/g, "").split(" "); const version = tmpA[tmpA.length - 1].trim(); for (const id of currentIdents) { ident