@cyclonedx/cdxgen
Version:
Creates CycloneDX Software Bill of Materials (SBOM) from source or container image
1,729 lines (1,660 loc) • 467 kB
JavaScript
import { Buffer } from "node:buffer";
import { spawnSync } from "node:child_process";
import { createHash, randomUUID } from "node:crypto";
import {
constants,
chmodSync,
copyFileSync,
createReadStream,
existsSync,
lstatSync,
mkdirSync,
mkdtempSync,
readFileSync,
rmSync,
unlinkSync,
writeFileSync,
} from "node:fs";
import { homedir, platform, tmpdir } from "node:os";
import path, {
basename,
delimiter as _delimiter,
dirname,
extname,
join,
resolve,
relative,
sep as _sep,
} from "node:path";
import process from "node:process";
import { URL, fileURLToPath } from "node:url";
import toml from "@iarna/toml";
import Arborist from "@npmcli/arborist";
import { load } from "cheerio";
import { parseEDNString } from "edn-data";
import { globSync } from "glob";
import got from "got";
import iconv from "iconv-lite";
import { load as _load } from "js-yaml";
import StreamZip from "node-stream-zip";
import { PackageURL } from "packageurl-js";
import propertiesReader from "properties-reader";
import {
clean,
coerce,
compare,
maxSatisfying,
parse,
satisfies,
valid,
} from "semver";
import { IriValidationStrategy, validateIri } from "validate-iri";
import { xml2js } from "xml-js";
import { getTreeWithPlugin } from "../managers/piptree.js";
import { thoughtLog } from "./logger.js";
let url = import.meta.url;
if (!url.startsWith("file://")) {
url = new URL(`file://${import.meta.url}`).toString();
}
// TODO: verify if this is a good method (Prabhu)
// this is due to dirNameStr being "cdxgen/lib/helpers" which causes errors
export const dirNameStr = import.meta
? dirname(dirname(dirname(fileURLToPath(url))))
: __dirname;
export const isSecureMode =
["true", "1"].includes(process.env?.CDXGEN_SECURE_MODE) ||
process.env?.NODE_OPTIONS?.includes("--permission");
export const isWin = platform() === "win32";
export const isMac = platform() === "darwin";
export let ATOM_DB = join(homedir(), ".local", "share", ".atomdb");
if (isWin) {
ATOM_DB = join(homedir(), "AppData", "Local", ".atomdb");
} else if (isMac) {
ATOM_DB = join(homedir(), "Library", "Application Support", ".atomdb");
}
/**
* Safely check if a file path exists without crashing due to a lack of permissions
*
* @param {String} filePath File path
* @Boolean True if the path exists. False otherwise
*/
export function safeExistsSync(filePath) {
if (isSecureMode && process.permission) {
if (!process.permission.has("fs.read", join(filePath, "", "*"))) {
if (DEBUG_MODE) {
console.log(`cdxgen lacks read permission for: ${filePath}`);
}
return false;
}
}
return existsSync(filePath);
}
/**
* Safely create a directory without crashing due to a lack of permissions
*
* @param {String} filePath File path
* @param options {Options} mkdir options
* @Boolean True if the path exists. False otherwise
*/
export function safeMkdirSync(filePath, options) {
if (isSecureMode && process.permission) {
if (!process.permission.has("fs.write", join(filePath, "", "*"))) {
if (DEBUG_MODE) {
console.log(`cdxgen lacks write permission for: ${filePath}`);
}
return undefined;
}
}
return mkdirSync(filePath, options);
}
const licenseMapping = JSON.parse(
readFileSync(join(dirNameStr, "data", "lic-mapping.json"), "utf-8"),
);
const vendorAliases = JSON.parse(
readFileSync(join(dirNameStr, "data", "vendor-alias.json"), "utf-8"),
);
const spdxLicenses = JSON.parse(
readFileSync(join(dirNameStr, "data", "spdx-licenses.json"), "utf-8"),
);
const knownLicenses = JSON.parse(
readFileSync(join(dirNameStr, "data", "known-licenses.json"), "utf-8"),
);
const mesonWrapDB = JSON.parse(
readFileSync(join(dirNameStr, "data", "wrapdb-releases.json"), "utf-8"),
);
export const frameworksList = JSON.parse(
readFileSync(join(dirNameStr, "data", "frameworks-list.json"), "utf-8"),
);
const selfPJson = JSON.parse(
readFileSync(join(dirNameStr, "package.json"), "utf-8"),
);
const CPP_STD_MODULES = JSON.parse(
readFileSync(join(dirNameStr, "data", "glibc-stdlib.json"), "utf-8"),
);
const _version = selfPJson.version;
// Refer to contrib/py-modules.py for a script to generate this list
// The script needs to be used once every few months to update this list
const PYTHON_STD_MODULES = JSON.parse(
readFileSync(join(dirNameStr, "data", "python-stdlib.json"), "utf-8"),
);
// Mapping between modules and package names
const PYPI_MODULE_PACKAGE_MAPPING = JSON.parse(
readFileSync(join(dirNameStr, "data", "pypi-pkg-aliases.json"), "utf-8"),
);
// FIXME. This has to get removed, once we improve the module detection one-liner.
// If you're a Rubyist, please help us improve this code.
const RUBY_KNOWN_MODULES = JSON.parse(
readFileSync(join(dirNameStr, "data", "ruby-known-modules.json"), "utf-8"),
);
// Debug mode flag
export const DEBUG_MODE =
["debug", "verbose"].includes(process.env.CDXGEN_DEBUG_MODE) ||
process.env.SCAN_DEBUG_MODE === "debug" ||
process.env.NODE_ENV === "development";
// Timeout milliseconds. Default 20 mins
export const TIMEOUT_MS =
Number.parseInt(process.env.CDXGEN_TIMEOUT_MS) || 20 * 60 * 1000;
// Max buffer for stdout and stderr. Defaults to 100MB
export const MAX_BUFFER =
Number.parseInt(process.env.CDXGEN_MAX_BUFFER) || 100 * 1024 * 1024;
// Metadata cache
export let metadata_cache = {};
// Speed up lookup namespaces for a given jar
const jarNSMapping_cache = {};
// Temporary files written by cdxgen, will be removed on exit
const temporaryFiles = new Set();
process.on("exit", () =>
temporaryFiles.forEach((tempFile) => {
if (existsSync(tempFile)) {
unlinkSync(tempFile);
}
}),
);
// Whether test scope shall be included for java/maven projects; default, if unset shall be 'true'
export const includeMavenTestScope =
!process.env.CDX_MAVEN_INCLUDE_TEST_SCOPE ||
["true", "1"].includes(process.env.CDX_MAVEN_INCLUDE_TEST_SCOPE);
// Whether to use the native maven dependency tree command. Defaults to true.
export const PREFER_MAVEN_DEPS_TREE = !["false", "0"].includes(
process.env?.PREFER_MAVEN_DEPS_TREE,
);
export function shouldFetchLicense() {
return (
process.env.FETCH_LICENSE &&
["true", "1"].includes(process.env.FETCH_LICENSE)
);
}
export function shouldFetchVCS() {
return (
process.env.GO_FETCH_VCS && ["true", "1"].includes(process.env.GO_FETCH_VCS)
);
}
// Whether license information should be fetched
export const FETCH_LICENSE = shouldFetchLicense();
// Whether search.maven.org will be used to identify jars without maven metadata; default, if unset shall be 'true'
export const SEARCH_MAVEN_ORG =
!process.env.SEARCH_MAVEN_ORG ||
["true", "1"].includes(process.env.SEARCH_MAVEN_ORG);
// circuit breaker for search maven.org
let search_maven_org_errors = 0;
const MAX_SEARCH_MAVEN_ORG_ERRORS = 1;
// circuit breaker for get repo license
let get_repo_license_errors = 0;
const MAX_GET_REPO_LICENSE_ERRORS = 5;
const MAX_LICENSE_ID_LENGTH = 100;
export const JAVA_CMD = getJavaCommand();
export function getJavaCommand() {
let javaCmd = "java";
if (process.env.JAVA_CMD) {
javaCmd = process.env.JAVA_CMD;
} else if (
process.env.JAVA_HOME &&
safeExistsSync(process.env.JAVA_HOME) &&
safeExistsSync(join(process.env.JAVA_HOME, "bin", "java"))
) {
javaCmd = join(process.env.JAVA_HOME, "bin", "java");
}
return javaCmd;
}
export const PYTHON_CMD = getPythonCommand();
export function getPythonCommand() {
let pythonCmd = "python";
if (process.env.PYTHON_CMD) {
pythonCmd = process.env.PYTHON_CMD;
} else if (process.env.CONDA_PYTHON_EXE) {
pythonCmd = process.env.CONDA_PYTHON_EXE;
}
return pythonCmd;
}
export let DOTNET_CMD = "dotnet";
if (process.env.DOTNET_CMD) {
DOTNET_CMD = process.env.DOTNET_CMD;
}
export let NODE_CMD = "node";
if (process.env.NODE_CMD) {
NODE_CMD = process.env.NODE_CMD;
}
export let NPM_CMD = "npm";
if (process.env.NPM_CMD) {
NPM_CMD = process.env.NPM_CMD;
}
export let YARN_CMD = "yarn";
if (process.env.YARN_CMD) {
YARN_CMD = process.env.YARN_CMD;
}
export let GCC_CMD = "gcc";
if (process.env.GCC_CMD) {
GCC_CMD = process.env.GCC_CMD;
}
export let RUSTC_CMD = "rustc";
if (process.env.RUSTC_CMD) {
RUSTC_CMD = process.env.RUSTC_CMD;
}
export let GO_CMD = "go";
if (process.env.GO_CMD) {
GO_CMD = process.env.GO_CMD;
}
export let CARGO_CMD = "cargo";
if (process.env.CARGO_CMD) {
CARGO_CMD = process.env.CARGO_CMD;
}
// Clojure CLI
export let CLJ_CMD = "clj";
if (process.env.CLJ_CMD) {
CLJ_CMD = process.env.CLJ_CMD;
}
export let LEIN_CMD = "lein";
if (process.env.LEIN_CMD) {
LEIN_CMD = process.env.LEIN_CMD;
}
export let CDXGEN_TEMP_DIR = "temp";
if (process.env.CDXGEN_TEMP_DIR) {
CDXGEN_TEMP_DIR = process.env.CDXGEN_TEMP_DIR;
}
// On a mac, use xcrun
// xcrun: Find and execute the named command line tool from the active developer directory
export const SWIFT_CMD =
process.env.SWIFT_CMD || isMac ? "xcrun swift" : "swift";
export const RUBY_CMD = process.env.RUBY_CMD || "ruby";
// Python components that can be excluded
export const PYTHON_EXCLUDED_COMPONENTS = [
"pip",
"setuptools",
"wheel",
"conda",
"conda-build",
"conda-index",
"conda-libmamba-solver",
"conda-package-handling",
"conda-package-streaming",
"conda-content-trust",
];
// Project type aliases
export const PROJECT_TYPE_ALIASES = {
java: [
"java",
"java8",
"java11",
"java17",
"java21",
"java22",
"java23",
"java24",
"groovy",
"kotlin",
"kt",
"scala",
"jvm",
"gradle",
"mvn",
"maven",
"sbt",
"bazel",
"quarkus",
],
android: ["android", "apk", "aab"],
jar: ["jar", "war", "ear"],
"gradle-index": ["gradle-index", "gradle-cache"],
"sbt-index": ["sbt-index", "sbt-cache"],
"maven-index": ["maven-index", "maven-cache", "maven-core"],
js: [
"npm",
"pnpm",
"nodejs",
"nodejs8",
"nodejs10",
"nodejs12",
"nodejs14",
"nodejs16",
"nodejs18",
"nodejs20",
"nodejs22",
"nodejs23",
"node",
"node8",
"node10",
"node12",
"node14",
"node16",
"node18",
"node20",
"node22",
"node23",
"js",
"javascript",
"typescript",
"ts",
"tsx",
"vsix",
"yarn",
"rush",
],
py: [
"py",
"python",
"pypi",
"python36",
"python38",
"python39",
"python310",
"python311",
"python312",
"python313",
"pixi",
"pip",
"poetry",
"uv",
"pdm",
"hatch",
],
go: ["go", "golang", "gomod", "gopkg"],
rust: ["rust", "rust-lang", "cargo"],
php: ["php", "composer", "wordpress"],
ruby: ["ruby", "gems", "rubygems", "bundler", "rb", "gemspec"],
csharp: [
"csharp",
"netcore",
"netcore2.1",
"netcore3.1",
"dotnet",
"dotnet6",
"dotnet7",
"dotnet8",
"dotnet9",
"dotnet-framework",
"dotnet-framework47",
"dotnet-framework48",
"vb",
"fsharp",
"twincat",
"csproj",
"tsproj",
"vbproj",
"sln",
"fsproj",
"plcproj",
"hmiproj",
],
dart: ["dart", "flutter", "pub"],
haskell: ["haskell", "hackage", "cabal"],
elixir: ["elixir", "hex", "mix"],
c: ["c", "cpp", "c++", "conan"],
clojure: ["clojure", "edn", "clj", "leiningen"],
github: ["github", "actions"],
os: ["os", "osquery", "windows", "linux", "mac", "macos", "darwin"],
jenkins: ["jenkins", "hpi"],
helm: ["helm", "charts"],
"helm-index": ["helm-index", "helm-repo"],
universal: [
"universal",
"containerfile",
"docker-compose",
"dockerfile",
"swarm",
"tekton",
"kustomize",
"operator",
"skaffold",
"kubernetes",
"openshift",
"yaml-manifest",
],
cloudbuild: ["cloudbuild"],
swift: [
"swift",
"ios",
"macos",
"swiftpm",
"ipados",
"tvos",
"watchos",
"visionos",
],
binary: ["binary", "blint"],
oci: ["docker", "oci", "container", "podman"],
cocoa: ["cocoa", "cocoapods", "objective-c", "swift", "ios"],
scala: ["scala", "scala3", "sbt", "mill"],
};
// Package manager aliases
export const PACKAGE_MANAGER_ALIASES = {
scala: ["sbt"],
};
/**
* Method to check if a given feature flag is enabled.
*
* @param {Object} cliOptions CLI options
* @param {String} feature Feature flag
*
* @returns {Boolean} True if the feature is enabled
*/
export function isFeatureEnabled(cliOptions, feature) {
if (cliOptions?.featureFlags?.includes(feature)) {
return true;
}
if (
process.env[feature.toUpperCase()] &&
["true", "1"].includes(process.env[feature.toUpperCase()])
) {
return true;
}
// Retry by replacing hyphens with underscore
return !!(
process.env[feature.replaceAll("-", "_").toUpperCase()] &&
["true", "1"].includes(
process.env[feature.replaceAll("-", "_").toUpperCase()],
)
);
}
/**
* Method to check if the given project types are allowed by checking against include and exclude types passed from the CLI arguments.
*
* @param {Array} projectTypes project types to check
* @param {Object} options CLI options
* @param {Boolean} defaultStatus Default return value if there are no types provided
*/
export function hasAnyProjectType(projectTypes, options, defaultStatus = true) {
// If no project type is specified, then consider it as yes
if (
!projectTypes ||
(!options.projectType?.length && !options.excludeType?.length)
) {
return defaultStatus;
}
// Convert string project types to an array
if (
projectTypes &&
(typeof projectTypes === "string" || projectTypes instanceof String)
) {
projectTypes = projectTypes.split(",");
}
// If only exclude type is specified, then do not allow oci type
if (
(projectTypes?.length === 1 || !defaultStatus) &&
!options.projectType?.length &&
options.excludeType?.length
) {
return (
!projectTypes.includes("oci") &&
!projectTypes.includes("oci-dir") &&
!projectTypes.includes("os") &&
!projectTypes.includes("docker") &&
!options.excludeType.includes("oci")
);
}
const allProjectTypes = [...projectTypes];
// Convert the project types into base types
const baseProjectTypes = [];
// Support for arbitray versioned ruby type
if (projectTypes.filter((p) => p.startsWith("ruby")).length) {
baseProjectTypes.push("ruby");
}
const baseExcludeTypes = [];
for (const abt of Object.keys(PROJECT_TYPE_ALIASES)) {
if (
PROJECT_TYPE_ALIASES[abt].filter((pt) =>
new Set(options?.projectType).has(pt),
).length
) {
baseProjectTypes.push(abt);
}
if (
PROJECT_TYPE_ALIASES[abt].filter((pt) => new Set(projectTypes).has(pt))
.length
) {
allProjectTypes.push(abt);
}
if (
PROJECT_TYPE_ALIASES[abt].filter((pt) =>
new Set(options?.excludeType).has(pt),
).length
) {
baseExcludeTypes.push(abt);
}
}
const shouldInclude =
!options.projectType?.length ||
options.projectType?.includes("universal") ||
options.projectType?.filter((pt) => new Set(allProjectTypes).has(pt))
.length > 0 ||
baseProjectTypes.filter((pt) => new Set(allProjectTypes).has(pt)).length >
0;
if (shouldInclude && options.excludeType) {
return (
!baseExcludeTypes.filter((pt) => pt && new Set(baseProjectTypes).has(pt))
.length &&
!baseExcludeTypes.filter((pt) => pt && new Set(allProjectTypes).has(pt))
.length
);
}
return shouldInclude;
}
/**
* Convenient method to check if the given package manager is allowed.
*
* @param {String} name Package manager name
* @param {Array} conflictingManagers List of package managers
* @param {Object} options CLI options
*
* @returns {Boolean} True if the package manager is allowed
*/
export function isPackageManagerAllowed(name, conflictingManagers, options) {
for (const apm of conflictingManagers) {
if (options?.projectType?.includes(apm)) {
return false;
}
}
const res = !options.excludeType?.filter(
(p) => p === name || PACKAGE_MANAGER_ALIASES[p]?.includes(name),
).length;
if (res) {
thoughtLog(
`**PACKAGE MANAGER**: Let's make use of the package manager '${name}', which is allowed.`,
);
}
return res;
}
// HTTP cache
const gotHttpCache = new Map();
function isCacheDisabled() {
return (
process.env.CDXGEN_NO_CACHE &&
["true", "1"].includes(process.env.CDXGEN_NO_CACHE)
);
}
const cache = isCacheDisabled() ? undefined : gotHttpCache;
// Custom user-agent for cdxgen
export const cdxgenAgent = got.extend({
headers: {
"user-agent": `@CycloneDX/cdxgen ${_version}`,
},
cache,
retry: {
limit: 0,
},
});
/**
* Method to get files matching a pattern
*
* @param {string} dirPath Root directory for search
* @param {string} pattern Glob pattern (eg: *.gradle)
* @param {Object} options CLI options
*/
export function getAllFiles(dirPath, pattern, options = {}) {
let ignoreList = [
"**/.hg/**",
"**/.git/**",
"**/venv/**",
"**/examples/**",
"**/site-packages/**",
"**/flow-typed/**",
"**/coverage/**",
];
// Only ignore node_modules if the caller is not looking for package.json
if (!pattern.includes("package.json")) {
ignoreList.push("**/node_modules/**");
}
// ignore docs only for non-lock file lookups
if (
!pattern.includes("package.json") &&
!pattern.includes("package-lock.json") &&
!pattern.includes("yarn.lock") &&
!pattern.includes("pnpm-lock.yaml")
) {
ignoreList.push("**/docs/**");
}
if (options?.exclude && Array.isArray(options.exclude)) {
ignoreList = ignoreList.concat(options.exclude);
}
return getAllFilesWithIgnore(dirPath, pattern, ignoreList);
}
/**
* Method to get files matching a pattern
*
* @param {string} dirPath Root directory for search
* @param {string} pattern Glob pattern (eg: *.gradle)
* @param {Array} ignoreList Directory patterns to ignore
*/
export function getAllFilesWithIgnore(dirPath, pattern, ignoreList) {
try {
const files = globSync(pattern, {
cwd: dirPath,
absolute: true,
nocase: true,
nodir: true,
dot: pattern.startsWith("."),
follow: false,
ignore: ignoreList,
});
if (files.length > 1) {
thoughtLog(
`Found ${files.length} files for the pattern '${pattern}' at '${dirPath}'.`,
);
}
return files;
} catch (err) {
if (DEBUG_MODE) {
console.error(err);
}
return [];
}
}
/**
* Method to encode hex string to base64 string
*
* @param {string} hexString hex string
* @returns {string} base64 encoded string
*/
function toBase64(hexString) {
return Buffer.from(hexString, "hex").toString("base64");
}
/**
* Return the current timestamp in YYYY-MM-DDTHH:MM:SSZ format.
*
* @returns {string} ISO formatted timestamp, without milliseconds.
*/
export function getTimestamp() {
return `${new Date().toISOString().split(".")[0]}Z`;
}
export function getTmpDir() {
if (
process.env.CDXGEN_TEMP_DIR &&
!safeExistsSync(process.env.CDXGEN_TEMP_DIR)
) {
safeMkdirSync(process.env.CDXGEN_TEMP_DIR, { recursive: true });
}
return process.env.CDXGEN_TEMP_DIR || tmpdir();
}
/**
* Method to determine if a license is a valid SPDX license expression
*
* @param {string} license License string
* @returns {boolean} true if the license is a valid SPDX license expression
* @see https://spdx.dev/learn/handling-license-info/
**/
export function isSpdxLicenseExpression(license) {
if (!license) {
return false;
}
if (/[(\s]+/g.test(license)) {
return true;
}
return !!license.endsWith("+");
}
/**
* Convert the array of licenses to a CycloneDX 1.5 compliant license array.
* This should return an array containing:
* - one or more SPDX license if no expression is present
* - the license of the expression if one expression is present
* - a unified conditional 'OR' license expression if more than one expression is present
*
* @param {Array} licenses Array of licenses
* @returns {Array} CycloneDX 1.5 compliant license array
*/
export function adjustLicenseInformation(licenses) {
if (!licenses || !Array.isArray(licenses)) {
return [];
}
const expressions = licenses.filter((f) => {
return f.expression;
});
if (expressions.length >= 1) {
if (expressions.length > 1) {
return [
{
expression: expressions
.map((e) => e.expression || "")
.filter(Boolean)
.join(" OR "),
},
];
}
return [{ expression: expressions[0].expression }];
}
return licenses.map((l) => {
if (typeof l.license === "object") {
return l;
}
return { license: l };
});
}
/**
* Performs a lookup + validation of the license specified in the
* package. If the license is a valid SPDX license ID, set the 'id'
* and url of the license object, otherwise, set the 'name' of the license
* object.
*/
export function getLicenses(pkg) {
let license = pkg.license && (pkg.license.type || pkg.license);
if (license) {
if (!Array.isArray(license)) {
license = [license];
}
return adjustLicenseInformation(
license
.filter((l) => l !== undefined)
.map((l) => {
let licenseContent = {};
if (typeof l === "string" || l instanceof String) {
if (
spdxLicenses.some((v) => {
return l === v;
})
) {
licenseContent.id = l;
licenseContent.url = `https://opensource.org/licenses/${l}`;
} else if (l.startsWith("http")) {
const knownLicense = getKnownLicense(l, pkg);
if (knownLicense) {
licenseContent.id = knownLicense.id;
licenseContent.name = knownLicense.name;
}
// We always need a name to avoid validation errors
// Issue: #469
if (!licenseContent.name && !licenseContent.id) {
licenseContent.name = "CUSTOM";
}
licenseContent.url = l;
} else if (isSpdxLicenseExpression(l)) {
licenseContent.expression = l;
} else {
licenseContent.name = l;
}
} else if (Object.keys(l).length) {
licenseContent = l;
} else {
return undefined;
}
if (!licenseContent.id) {
addLicenseText(pkg, l, licenseContent);
}
return licenseContent;
}),
);
}
const knownLicense = getKnownLicense(undefined, pkg);
if (knownLicense) {
return [{ license: knownLicense }];
}
return undefined;
}
/**
* Method to retrieve known license by known-licenses.json
*
* @param {String} licenseUrl Repository url
* @param {String} pkg Bom ref
* @return {Object} Objetct with SPDX license id or license name
*/
export function getKnownLicense(licenseUrl, pkg) {
if (licenseUrl?.includes("opensource.org")) {
const possibleId = licenseUrl
.toLowerCase()
.replace("https://", "http://")
.replace("http://www.opensource.org/licenses/", "");
for (const spdxLicense of spdxLicenses) {
if (spdxLicense.toLowerCase() === possibleId) {
return { id: spdxLicense };
}
}
} else if (licenseUrl?.includes("apache.org")) {
const possibleId = licenseUrl
.toLowerCase()
.replace("https://", "http://")
.replace("http://www.apache.org/licenses/license-", "apache-")
.replace(".txt", "");
for (const spdxLicense of spdxLicenses) {
if (spdxLicense.toLowerCase() === possibleId) {
return { id: spdxLicense };
}
}
}
for (const akLicGroup of knownLicenses) {
if (
akLicGroup.packageNamespace === "*" ||
pkg.purl?.startsWith(akLicGroup.packageNamespace)
) {
for (const akLic of akLicGroup.knownLicenses) {
if (akLic.group && akLic.name) {
if (akLic.group === "." && akLic.name === pkg.name) {
return { id: akLic.license, name: akLic.licenseName };
}
if (
pkg.group?.includes(akLic.group) &&
(akLic.name === pkg.name || akLic.name === "*")
) {
return { id: akLic.license, name: akLic.licenseName };
}
}
if (
akLic.urlIncludes &&
licenseUrl &&
licenseUrl.includes(akLic.urlIncludes)
) {
return { id: akLic.license, name: akLic.licenseName };
}
if (
akLic.urlEndswith &&
licenseUrl &&
licenseUrl.endsWith(akLic.urlEndswith)
) {
return { id: akLic.license, name: akLic.licenseName };
}
}
}
}
return undefined;
}
/**
* Tries to find a file containing the license text based on commonly
* used naming and content types. If a candidate file is found, add
* the text to the license text object and stop.
*/
export function addLicenseText(pkg, l, licenseContent) {
const licenseFilenames = [
"LICENSE",
"License",
"license",
"LICENCE",
"Licence",
"licence",
"NOTICE",
"Notice",
"notice",
];
const licenseContentTypes = {
"text/plain": "",
"text/txt": ".txt",
"text/markdown": ".md",
"text/xml": ".xml",
};
/* Loops over different name combinations starting from the license specified
naming (e.g., 'LICENSE.Apache-2.0') and proceeding towards more generic names. */
for (const licenseName of [`.${l}`, ""]) {
for (const licenseFilename of licenseFilenames) {
for (const [licenseContentType, fileExtension] of Object.entries(
licenseContentTypes,
)) {
const licenseFilepath = `${pkg.realPath}/${licenseFilename}${licenseName}${fileExtension}`;
if (safeExistsSync(licenseFilepath)) {
licenseContent.text = readLicenseText(
licenseFilepath,
licenseContentType,
);
return;
}
}
}
}
}
/**
* Read the file from the given path to the license text object and includes
* content-type attribute, if not default. Returns the license text object.
*/
export function readLicenseText(licenseFilepath, licenseContentType) {
const licenseText = readFileSync(licenseFilepath, "utf8");
if (licenseText) {
const licenseContentText = { content: licenseText };
if (licenseContentType !== "text/plain") {
licenseContentText["contentType"] = licenseContentType;
}
return licenseContentText;
}
return null;
}
export async function getSwiftPackageMetadata(pkgList) {
const cdepList = [];
for (const p of pkgList) {
if (p.repository?.url) {
if (p.repository.url.includes("://github.com/")) {
try {
p.license = await getRepoLicense(p.repository.url, undefined);
} catch (e) {
console.error("error fetching repo license from", p.repository.url);
}
} else {
if (DEBUG_MODE) {
console.log(
p.repository.url,
"is currently not supported to fetch for licenses",
);
}
}
} else {
if (DEBUG_MODE) {
console.warn("no repository url found for", p.name);
}
}
cdepList.push(p);
}
return cdepList;
}
/**
* Method to retrieve metadata for npm packages by querying npmjs
*
* @param {Array} pkgList Package list
*/
export async function getNpmMetadata(pkgList) {
const NPM_URL = process.env.NPM_URL || "https://registry.npmjs.org/";
const cdepList = [];
for (const p of pkgList) {
try {
let key = p.name;
if (p.group && p.group !== "") {
let group = p.group;
if (!group.startsWith("@")) {
group = `@${group}`;
}
key = `${group}/${p.name}`;
}
let body = {};
if (metadata_cache[key]) {
body = metadata_cache[key];
} else {
const res = await cdxgenAgent.get(NPM_URL + key, {
responseType: "json",
});
body = res.body;
metadata_cache[key] = body;
}
p.description =
body.versions?.[p.version]?.description || body.description;
p.license =
body.versions?.[p.version]?.license ||
body.license ||
(await getRepoLicense(body.repository?.url, undefined));
if (body.repository?.url) {
p.repository = { url: body.repository.url };
}
if (body.homepage) {
p.homepage = { url: body.homepage };
}
cdepList.push(p);
} catch (err) {
cdepList.push(p);
if (DEBUG_MODE) {
console.error(p, "was not found on npm");
}
}
}
return cdepList;
}
/**
* Parse nodejs package json file
*
* @param {string} pkgJsonFile package.json file
* @param {boolean} simple Return a simpler representation of the component by skipping extended attributes and license fetch.
*/
export async function parsePkgJson(pkgJsonFile, simple = false) {
const pkgList = [];
if (safeExistsSync(pkgJsonFile)) {
try {
const pkgData = JSON.parse(readFileSync(pkgJsonFile, "utf8"));
const pkgIdentifier = parsePackageJsonName(pkgData.name);
let name = pkgIdentifier.fullName || pkgData.name;
if (!name && !pkgJsonFile.includes("node_modules")) {
name = basename(dirname(pkgJsonFile));
}
const group = pkgIdentifier.scope || "";
const purl = new PackageURL(
"npm",
group,
name,
pkgData.version,
null,
null,
).toString();
const author = pkgData.author;
const authorString =
author instanceof Object
? `${author.name}${author.email ? ` <${author.email}>` : ""}${
author.url ? ` (${author.url})` : ""
}`
: author;
const apkg = {
name,
group,
version: pkgData.version,
description: pkgData.description,
purl: purl,
"bom-ref": decodeURIComponent(purl),
author: authorString,
license: pkgData.license,
};
if (pkgData.homepage) {
apkg.homepage = { url: pkgData.homepage };
}
if (pkgData.repository?.url) {
apkg.repository = { url: pkgData.repository.url };
}
if (!simple) {
apkg.properties = [
{
name: "SrcFile",
value: pkgJsonFile,
},
];
apkg.evidence = {
identity: {
field: "purl",
confidence: 0.7,
methods: [
{
technique: "manifest-analysis",
confidence: 0.7,
value: pkgJsonFile,
},
],
},
};
}
pkgList.push(apkg);
} catch (err) {
// continue regardless of error
}
}
if (!simple && shouldFetchLicense() && pkgList && pkgList.length) {
if (DEBUG_MODE) {
console.log(
`About to fetch license information for ${pkgList.length} packages in parsePkgJson`,
);
}
return await getNpmMetadata(pkgList);
}
return pkgList;
}
/**
* Parse nodejs package lock file
*
* @param {string} pkgLockFile package-lock.json file
* @param {object} options Command line options
*/
export async function parsePkgLock(pkgLockFile, options = {}) {
let pkgList = [];
let dependenciesList = [];
if (!options) {
options = {};
}
const pkgSpecVersionCache = {};
if (!safeExistsSync(pkgLockFile)) {
return {
pkgList,
dependenciesList,
};
}
const parseArboristNode = (
node,
rootNode,
parentRef = null,
visited = new Set(),
pkgSpecVersionCache = {},
options = {},
) => {
if (visited.has(node)) {
return { pkgList: [], dependenciesList: [] };
}
visited.add(node);
let pkgList = [];
let dependenciesList = [];
// Create the package entry
const srcFilePath = node.path.includes(`${_sep}node_modules`)
? node.path.split(`${_sep}node_modules`)[0]
: node.path;
const scope = node.dev === true ? "optional" : undefined;
const integrity = node.integrity ? node.integrity : undefined;
let pkg;
let purlString;
const author = node.package.author;
const authorString =
author instanceof Object
? `${author.name}${author.email ? ` <${author.email}>` : ""}${
author.url ? ` (${author.url})` : ""
}`
: author;
if (node === rootNode) {
purlString = new PackageURL(
"npm",
options.projectGroup || "",
"project-name" in options ? options.projectName : node.packageName,
options.projectVersion || node.version,
null,
null,
)
.toString()
.replace(/%2F/g, "/");
pkg = {
author: authorString,
group: options.projectGroup || "",
name:
"project-name" in options ? options.projectName : node.packageName,
version: options.projectVersion || node.version,
type: "application",
purl: purlString,
"bom-ref": decodeURIComponent(purlString),
};
} else {
purlString = new PackageURL(
"npm",
"",
node.packageName,
node.version,
null,
null,
)
.toString()
.replace(/%2F/g, "/");
const pkgLockFile = join(
srcFilePath.replace("/", _sep),
"package-lock.json",
);
pkg = {
group: "",
name: node.packageName,
version: node.version,
author: authorString,
scope: scope,
_integrity: integrity,
externalReferences: [],
properties: [
{
name: "SrcFile",
value: pkgLockFile,
},
],
evidence: {
identity: {
field: "purl",
confidence: 1,
methods: [
{
technique: "manifest-analysis",
confidence: 1,
value: pkgLockFile,
},
],
},
},
type: parentRef ? "npm" : "application",
purl: purlString,
"bom-ref": decodeURIComponent(purlString),
};
if (node.resolved) {
if (node.resolved.startsWith("file:")) {
pkg.properties.push({
name: "cdx:npm:resolvedPath",
value: node.realpath
? relative(dirname(pkgLockFile), node.realpath)
: relative(
dirname(pkgLockFile),
resolve(node.resolved.replace("file:", "")),
),
});
} else {
pkg.properties.push({
name: "ResolvedUrl",
value: node.resolved,
});
pkg.distribution = { url: node.resolved };
}
}
if (node.location) {
pkg.properties.push({
name: "LocalNodeModulesPath",
value: node.location,
});
}
if (node?.installLinks) {
pkg.properties.push({
name: "cdx:npm:installLinks",
value: "true",
});
}
if (node?.binPaths?.length) {
pkg.properties.push({
name: "cdx:npm:binPaths",
value: node.binPaths.join(", "),
});
}
if (node?.hasInstallScript) {
pkg.properties.push({
name: "cdx:npm:hasInstallScript",
value: "true",
});
}
if (node?.isLink) {
pkg.properties.push({
name: "cdx:npm:isLink",
value: "true",
});
}
// This getter method could fail with errors at times.
// Example Error: Invalid tag name "^>=6.0.0" of package "^>=6.0.0": Tags may not have any characters that encodeURIComponent encodes.
try {
if (!node?.isRegistryDependency) {
pkg.properties.push({
name: "cdx:npm:isRegistryDependency",
value: "false",
});
}
} catch (err) {
// ignore
}
if (node?.isWorkspace) {
pkg.properties.push({
name: "cdx:npm:isWorkspace",
value: "true",
});
}
if (node?.inBundle) {
pkg.properties.push({
name: "cdx:npm:inBundle",
value: "true",
});
}
if (node?.inDepBundle) {
pkg.properties.push({
name: "cdx:npm:inDepBundle",
value: "true",
});
}
if (node.package?.repository?.url) {
pkg.externalReferences.push({
type: "vcs",
url: node.package.repository.url,
});
}
if (node.package?.bugs?.url) {
pkg.externalReferences.push({
type: "issue-tracker",
url: node.package.bugs.url,
});
}
if (node?.package?.keywords?.length) {
pkg.tags = Array.isArray(node.package.keywords)
? node.package.keywords.sort()
: node.package.keywords.split(",");
}
}
if (node.package?.license) {
// License will be overridden if shouldFetchLicense() is enabled
pkg.license = node.package.license;
}
const deprecatedMessage = node.package?.deprecated;
if (deprecatedMessage) {
pkg.properties.push({
name: "cdx:npm:deprecated",
value: deprecatedMessage,
});
}
pkgList.push(pkg);
// retrieve workspace node pkglists
const workspaceDependsOn = [];
if (node.fsChildren && node.fsChildren.size > 0) {
for (const workspaceNode of node.fsChildren) {
const {
pkgList: childPkgList,
dependenciesList: childDependenciesList,
} = parseArboristNode(
workspaceNode,
rootNode,
purlString,
visited,
pkgSpecVersionCache,
options,
);
pkgList = pkgList.concat(childPkgList);
dependenciesList = dependenciesList.concat(childDependenciesList);
let depWorkspacePurlString = decodeURIComponent(
new PackageURL(
"npm",
"",
workspaceNode.name,
workspaceNode.version,
null,
null,
)
.toString()
.replace(/%2F/g, "/"),
);
let purlStringFromPkgid;
if (workspaceNode.pkgid) {
purlStringFromPkgid = `pkg:npm/${workspaceNode.pkgid.replace(`${workspaceNode.name}@npm:`, "")}`;
}
if (
purlStringFromPkgid &&
purlStringFromPkgid !== depWorkspacePurlString
) {
if (DEBUG_MODE) {
console.log(
`Internal warning: Got two different refs for this workspace node: ${depWorkspacePurlString} and ${purlStringFromPkgid}. Assuming the bom-ref as ${purlStringFromPkgid} based on pkgid.`,
);
}
depWorkspacePurlString = purlStringFromPkgid;
}
if (decodeURIComponent(purlString) !== depWorkspacePurlString) {
workspaceDependsOn.push(depWorkspacePurlString);
}
}
}
// this handles the case when a node has ["dependencies"] key in a package-lock.json
// for a node. We exclude the root node because it's already been handled
// If the node has "requires", we don't have to track the "dependencies"
const childrenDependsOn = [];
if (node !== rootNode && !node.edgesOut.size) {
for (const child of node.children) {
const childNode = child[1];
const {
pkgList: childPkgList,
dependenciesList: childDependenciesList,
} = parseArboristNode(
childNode,
rootNode,
decodeURIComponent(purlString),
visited,
pkgSpecVersionCache,
options,
);
pkgList = pkgList.concat(childPkgList);
dependenciesList = dependenciesList.concat(childDependenciesList);
const depChildString = decodeURIComponent(
new PackageURL(
"npm",
"",
childNode.name,
childNode.version,
null,
null,
)
.toString()
.replace(/%2F/g, "/"),
);
if (decodeURIComponent(purlString) !== depChildString) {
childrenDependsOn.push(depChildString);
}
}
}
// this handles the case when a node has a ["requires"] key
const pkgDependsOn = [];
for (const edge of node.edgesOut.values()) {
let targetVersion;
let targetName;
let foundMatch = false;
// This cache is required to help us down the line.
if (edge?.to?.version && edge?.spec) {
pkgSpecVersionCache[`${edge.name}-${edge.spec}`] = edge.to.version;
}
// if the edge doesn't have an integrity, it's likely a peer dependency
// which isn't installed
// Bug #795. At times, npm loses the integrity node completely and such packages are getting missed out
// To keep things safe, we include these packages.
let edgeToIntegrityOrLocation = edge.to ? edge.to.integrity : undefined;
// Fallback to location based lookups when integrity is missing
if (!edgeToIntegrityOrLocation && edge.to && edge.to.location) {
edgeToIntegrityOrLocation = edge.to.location;
}
if (!edgeToIntegrityOrLocation) {
// This hack is required to fix the package name
targetName = edge.name.replace(/-cjs$/, "");
foundMatch = false;
} else {
// the edges don't actually contain a version, so we need to search the root node
// children to find the correct version. we check the node children first, then
// we check the root node children
for (const child of node.children) {
if (edgeToIntegrityOrLocation) {
if (
child[1].integrity === edgeToIntegrityOrLocation ||
child[1].location === edgeToIntegrityOrLocation
) {
targetName = child[0].replace(/node_modules\//g, "");
// The package name could be different from the targetName retrieved
// Eg: "string-width-cjs": "npm:string-width@^4.2.0",
if (child[1].packageName && child[1].packageName !== targetName) {
targetName = child[1].packageName;
}
targetVersion = child[1].version;
foundMatch = true;
break;
}
}
}
}
if (!foundMatch) {
for (const child of rootNode.children) {
if (
edgeToIntegrityOrLocation &&
(child[1].integrity === edgeToIntegrityOrLocation ||
child[1].location === edgeToIntegrityOrLocation)
) {
targetName = child[0].replace(/node_modules\//g, "");
targetVersion = child[1].version;
// The package name could be different from the targetName retrieved
// "string-width-cjs": "npm:string-width@^4.2.0",
if (child[1].packageName && child[1].packageName !== targetName) {
targetName = child[1].packageName;
}
break;
}
}
if (!targetVersion || !targetName) {
if (pkgSpecVersionCache[`${edge.name}-${edge.spec}`]) {
targetVersion = pkgSpecVersionCache[`${edge.name}-${edge.spec}`];
targetName = edge.name;
}
}
}
// if we can't find the version of the edge, continue
// it may be an optional peer dependency
if (!targetVersion || !targetName) {
if (
DEBUG_MODE &&
!options.deep &&
!["optional", "peer", "peerOptional"].includes(edge?.type)
) {
if (!targetVersion) {
console.log(
`Unable to determine the version for the dependency ${edge.name} from the path ${edge?.from?.path}. This is likely an edge case that is not handled.`,
edge,
);
} else if (!targetName) {
console.log(
`Unable to determine the name for the dependency from the edge from the path ${edge?.from?.path}. This is likely an edge case that is not handled.`,
edge,
);
}
}
// juice-shop
// Lock files created with --legacy-peer-deps will have certain peer dependencies missing
// This flags any non-missing peers
if (DEBUG_MODE && edge?.type === "peer" && edge?.error !== "MISSING") {
console.log(
`Unable to determine the version for the dependency ${edge.name} from the path ${edge?.from?.path}. This is likely an edge case that is not handled.`,
edge,
);
}
continue;
}
const depPurlString = decodeURIComponent(
new PackageURL("npm", "", targetName, targetVersion, null, null)
.toString()
.replace(/%2F/g, "/"),
);
if (decodeURIComponent(purlString) !== depPurlString) {
pkgDependsOn.push(depPurlString);
}
if (edge.to == null) {
continue;
}
const { pkgList: childPkgList, dependenciesList: childDependenciesList } =
parseArboristNode(
edge.to,
rootNode,
decodeURIComponent(purlString),
visited,
pkgSpecVersionCache,
options,
);
pkgList = pkgList.concat(childPkgList);
dependenciesList = dependenciesList.concat(childDependenciesList);
}
dependenciesList.push({
ref: decodeURIComponent(purlString),
dependsOn: [
...new Set(
workspaceDependsOn.concat(childrenDependsOn).concat(pkgDependsOn),
),
].sort(),
});
return { pkgList, dependenciesList };
};
let arb = new Arborist({
path: path.dirname(pkgLockFile),
// legacyPeerDeps=false enables npm >v3 package dependency resolution
legacyPeerDeps: false,
});
let tree = undefined;
try {
const rootNodeModulesDir = join(path.dirname(pkgLockFile), "node_modules");
if (safeExistsSync(rootNodeModulesDir)) {
if (options.deep) {
console.log(
`Constructing the actual dependency hierarchy from ${rootNodeModulesDir}.`,
);
tree = await arb.loadActual();
} else {
if (DEBUG_MODE) {
console.log(
"Constructing virtual dependency tree based on the lock file. Pass --deep argument to construct the actual dependency tree from disk.",
);
}
tree = await arb.loadVirtual();
}
} else {
tree = await arb.loadVirtual();
}
} catch (e) {
console.log(
`Unable to parse ${pkgLockFile} without legacy peer dependencies. Retrying ...`,
);
if (DEBUG_MODE) {
console.log(e);
}
try {
arb = new Arborist({
path: path.dirname(pkgLockFile),
legacyPeerDeps: true,
});
tree = await arb.loadVirtual();
} catch (e) {
console.log(
`Unable to parse ${pkgLockFile} in legacy and non-legacy mode. The resulting SBOM would be incomplete.`,
);
if (DEBUG_MODE) {
console.log(e);
}
return { pkgList, dependenciesList };
}
}
if (!tree) {
return { pkgList, dependenciesList };
}
({ pkgList, dependenciesList } = parseArboristNode(
tree,
tree,
null,
new Set(),
pkgSpecVersionCache,
options,
));
if (shouldFetchLicense() && pkgList && pkgList.length) {
if (DEBUG_MODE) {
console.log(
`About to fetch license information for ${pkgList.length} packages in parsePkgLock`,
);
}
pkgList = await getNpmMetadata(pkgList);
return { pkgList, dependenciesList };
}
return {
pkgList,
dependenciesList,
};
}
/**
* Given a lock file this method would return an Object with the identity as the key and parsed name and value
* eg: "@actions/core@^1.2.6", "@actions/core@^1.6.0":
* version "1.6.0"
* would result in two entries
*
* @param {string} lockData Yarn Lockfile data
*/
export function yarnLockToIdentMap(lockData) {
const identMap = {};
let currentIdents = [];
lockData.split("\n").forEach((l) => {
l = l.replace("\r", "");
if (l === "\n" || !l.length || l.startsWith("#")) {
return;
}
// "@actions/core@^1.2.6", "@actions/core@^1.6.0":
if (!l.startsWith(" ") && l.trim().length > 0) {
const tmpA = l.replace(/["']/g, "").split(", ");
if (tmpA?.length) {
for (let s of tmpA) {
if (!s.startsWith("__")) {
if (s.endsWith(":")) {
s = s.substring(0, s.length - 1);
}
// Non-strict mode parsing
const match = s.match(/^(?:(@[^/]+?)\/)?([^/]+?)(?:@(.+))?$/);
if (!match) {
continue;
}
let [, group, name, range] = match;
if (group) {
group = `${group}/`;
}
// "lru-cache@npm:^6.0.0":
// "string-width-cjs@npm:string-width@^4.2.0":
// Here range can be
// - npm:^6.0.0
// - npm:@types/ioredis@^4.28.10
// - npm:strip-ansi@^6.0.1
// See test cases with yarn3.lock and yarn6.lock
if (range?.startsWith("npm:")) {
if (range.includes("@")) {
range = range.split("@").slice(-1)[0];
} else {
range = range.replace("npm:", "");
}
}
currentIdents.push(`${group || ""}${name}|${range}`);
}
}
}
} else if (
(l.startsWith(" version") || l.startsWith(' "version')) &&
currentIdents.length
) {
const tmpA = l.replace(/"/g, "").split(" ");
const version = tmpA[tmpA.length - 1].trim();
for (const id of currentIdents) {
ident