UNPKG

@cyclonedx/cdxgen

Version:

Creates CycloneDX Software Bill of Materials (SBOM) from source or container image

1,545 lines (1,522 loc) 61.4 kB
import fs from "node:fs"; import path, { basename, join, resolve } from "node:path"; import process from "node:process"; import { PackageURL } from "packageurl-js"; import { Op } from "sequelize"; import { findCryptoAlgos } from "../helpers/cbomutils.js"; import * as db from "../helpers/db.js"; import { DEBUG_MODE, PROJECT_TYPE_ALIASES, collectGradleDependencies, collectMvnDependencies, executeAtom, getAllFiles, getGradleCommand, getMavenCommand, getTimestamp, getTmpDir, safeExistsSync, safeMkdirSync, } from "../helpers/utils.js"; import { postProcess } from "../stages/postgen/postgen.js"; import { findPurlLocations } from "./scalasem.js"; import { createSemanticsSlices } from "./swiftsem.js"; const DB_NAME = "evinser.db"; const typePurlsCache = {}; /** * Function to create the db for the libraries referred in the sbom. * * @param {Object} options Command line options */ export async function prepareDB(options) { if (!options.dbPath.includes("memory") && !safeExistsSync(options.dbPath)) { try { safeMkdirSync(options.dbPath, { recursive: true }); } catch (e) { // ignore } } const dirPath = options._[0] || "."; const bomJsonFile = options.input; if (!safeExistsSync(bomJsonFile)) { console.log( "Bom file doesn't exist. Check if cdxgen was invoked with the correct type argument.", ); if (!process.env.CDXGEN_DEBUG_MODE) { console.log( "Set the environment variable CDXGEN_DEBUG_MODE to debug to troubleshoot the issue further.", ); } return; } const bomJson = JSON.parse(fs.readFileSync(bomJsonFile, "utf8")); if (bomJson.specVersion < 1.5) { console.log( "Evinse requires the input SBOM in CycloneDX 1.5 format or above. You can generate one by invoking cdxgen without any --spec-version argument.", ); process.exit(0); } const components = bomJson.components || []; const { sequelize, Namespaces, Usages, DataFlows } = await db.createOrLoad( DB_NAME, options.dbPath, ); let hasMavenPkgs = false; // We need to slice only non-maven packages const purlsToSlice = {}; const purlsJars = {}; let usagesSlice = undefined; for (const comp of components) { if (!comp.purl) { continue; } usagesSlice = await Usages.findByPk(comp.purl); const namespaceSlice = await Namespaces.findByPk(comp.purl); if ((!usagesSlice && !namespaceSlice) || options.force) { if (comp.purl.startsWith("pkg:maven")) { hasMavenPkgs = true; } } } // If there are maven packages we collect and store the namespaces if (!options.skipMavenCollector && hasMavenPkgs) { const pomXmlFiles = getAllFiles(dirPath, "**/" + "pom.xml"); const gradleFiles = getAllFiles(dirPath, "**/" + "build.gradle*"); if (pomXmlFiles?.length) { await catalogMavenDeps(dirPath, purlsJars, Namespaces, options); } if (gradleFiles?.length) { await catalogGradleDeps(dirPath, purlsJars, Namespaces); } } for (const purl of Object.keys(purlsToSlice)) { await createAndStoreSlice(purl, purlsJars, Usages, options); } return { sequelize, Namespaces, Usages, DataFlows }; } export async function catalogMavenDeps( dirPath, purlsJars, Namespaces, options = {}, ) { let jarNSMapping = undefined; if (safeExistsSync(join(dirPath, "bom.json.map"))) { try { const mapData = JSON.parse( fs.readFileSync(join(dirPath, "bom.json.map"), "utf-8"), ); if (mapData && Object.keys(mapData).length) { jarNSMapping = mapData; } } catch (err) { // ignore } } if (!jarNSMapping) { console.log("About to collect jar dependencies for the path", dirPath); const mavenCmd = getMavenCommand(dirPath, dirPath); // collect all jars including from the cache if data-flow mode is enabled jarNSMapping = await collectMvnDependencies( mavenCmd, dirPath, false, options.withDeepJarCollector, ); } if (jarNSMapping) { for (const purl of Object.keys(jarNSMapping)) { purlsJars[purl] = jarNSMapping[purl].jarFile; await Namespaces.findOrCreate({ where: { purl }, defaults: { purl, data: JSON.stringify( { pom: jarNSMapping[purl].pom, namespaces: jarNSMapping[purl].namespaces, }, null, null, ), }, }); } } } export async function catalogGradleDeps(dirPath, purlsJars, Namespaces) { console.log( "About to collect jar dependencies from the gradle cache. This would take a while ...", ); const gradleCmd = getGradleCommand(dirPath, dirPath); // collect all jars including from the cache if data-flow mode is enabled const jarNSMapping = await collectGradleDependencies( gradleCmd, dirPath, false, true, ); if (jarNSMapping) { for (const purl of Object.keys(jarNSMapping)) { purlsJars[purl] = jarNSMapping[purl].jarFile; await Namespaces.findOrCreate({ where: { purl }, defaults: { purl, data: JSON.stringify( { pom: jarNSMapping[purl].pom, namespaces: jarNSMapping[purl].namespaces, }, null, null, ), }, }); } } console.log( "To speed up successive re-runs, pass the argument --skip-maven-collector to evinse command.", ); } export async function createAndStoreSlice( purl, purlsJars, Usages, options = {}, ) { const retMap = createSlice(purl, purlsJars[purl], "usages", options); let sliceData = undefined; if (retMap?.slicesFile && safeExistsSync(retMap.slicesFile)) { sliceData = await Usages.findOrCreate({ where: { purl }, defaults: { purl, data: fs.readFileSync(retMap.slicesFile, "utf-8"), }, }); } if (retMap?.tempDir?.startsWith(getTmpDir())) { fs.rmSync(retMap.tempDir, { recursive: true, force: true }); } return sliceData; } export async function createSlice( purlOrLanguages, filePath, sliceType = "usages", options = {}, ) { if (!filePath) { return {}; } const firstLanguage = Array.isArray(purlOrLanguages) ? purlOrLanguages[0] : purlOrLanguages; let language = firstLanguage.startsWith("pkg:") ? purlToLanguage(firstLanguage, filePath) : firstLanguage; if (!language) { return {}; } // Handle language with version types if (language.startsWith("ruby")) { language = "ruby"; } else if (language.startsWith("java") && language !== "javascript") { language = "java"; } else if (language.startsWith("node")) { language = "js"; } else if (language.startsWith("python")) { language = "python"; } else if (PROJECT_TYPE_ALIASES.scala.includes(language)) { language = "scala"; } if ( PROJECT_TYPE_ALIASES.swift.includes(language) && sliceType !== "semantics" ) { return {}; } let sliceOutputDir = fs.mkdtempSync(join(getTmpDir(), `atom-${sliceType}-`)); if (options?.output) { sliceOutputDir = safeExistsSync(options.output) && fs.lstatSync(options.output).isDirectory() ? path.basename(options.output) : path.dirname(options.output); } const slicesFile = options[`${sliceType}SlicesFile`] || join(sliceOutputDir, `${language}-${sliceType}.slices.json`); const openapiSpecFile = basename( options.openapiSpecFile || process.env?.ATOM_TOOLS_OPENAPI_FILENAME || join(sliceOutputDir, `${language}-openapi.json`), ); // For some languages such as scala, semantics slices file would get created during usages slicing. let semanticsSlicesFile; if (sliceType === "semantics") { const slicesData = createSemanticsSlices(resolve(filePath), options); // Write the semantics slices data if (slicesData) { fs.writeFileSync( slicesFile, JSON.stringify(slicesData, null, options.jsonPretty ? 2 : null), ); } return { tempDir: sliceOutputDir, slicesFile }; } console.log( `Creating ${sliceType} slice for ${resolve(filePath)}. Please wait ...`, ); const atomFile = join(sliceOutputDir, `${language}-app.atom`); let args = [sliceType]; // Support for crypto slices aka CBOM if (sliceType === "reachables" && options.includeCrypto) { args.push("--include-crypto"); } if (sliceType === "usages") { // Generate OpenAPI specification for endpoints. Needs atom-tools pypi package to be installed. args.push("--extract-endpoints"); if (process.env?.CDXGEN_IN_CONTAINER !== "true") { console.log( "Use an official cdxgen container image to improve the precision of endpoints detection (for SaaSBOM).", ); } if (["ruby", "scala"].includes(language)) { args.push("--remove-atom"); } if (["scala"].includes(language)) { semanticsSlicesFile = join( sliceOutputDir, basename(options.semanticsSlicesFile || "semantics.slices.json"), ); } } args = args.concat([ "-l", language, "-o", resolve(atomFile), "--slice-outfile", resolve(slicesFile), ]); // For projects with several layers, slice depth needs to be increased from the default 7 to 15 or 20 // This would increase the time but would yield more deeper paths if (sliceType === "data-flow" && process.env.ATOM_SLICE_DEPTH) { args.push("--slice-depth"); args.push(process.env.ATOM_SLICE_DEPTH); } args.push(resolve(filePath)); // Execute atom const result = executeAtom(filePath, args, { ATOM_TOOLS_OPENAPI_FILENAME: openapiSpecFile, // The file would get over-written ATOM_TOOLS_OPENAPI_FORMAT: process.env?.ATOM_TOOLS_OPENAPI_FORMAT || "openapi3.1.0", // editor.swagger.io doesn't support 3.1.0 yet ATOM_TOOLS_WORK_DIR: process.env?.ATOM_TOOLS_WORK_DIR || resolve(filePath), // This must be the directory containing semantics.slices.json OPENAPI_SERVER_URL: process.env?.OPENAPI_SERVER_URL, }); if (!result || !safeExistsSync(slicesFile)) { console.warn( `Unable to generate ${sliceType} slice using atom. Check if this is a supported language.`, ); if (!process.env?.CDXGEN_DEBUG_MODE) { console.log( "Set the environment variable CDXGEN_DEBUG_MODE=debug to troubleshoot.", ); } else { if (process.env?.CDXGEN_IN_CONTAINER === "true") { console.log( "TIP: Try creating the slices using the official atom container image `ghcr.io/appthreat/atom:main` directly. Refer to the documentation: https://atom-docs.appthreat.dev/", ); console.log( `evinse will automatically reuse any slices matching the name ${slicesFile}`, ); } else { console.log( `TIP: Try using a cdxgen container image optimized for ${language}. Refer to the documentation or ask cdxgenGPT for the image details.`, ); } } } else if ( DEBUG_MODE && sliceType === "usages" && !safeExistsSync(join(filePath, openapiSpecFile)) ) { console.log( `openapi spec file "${join(filePath, openapiSpecFile)}" was not generated successfully. Check if atom-tools pypi package is installed and available in PATH.`, ); } if ( ["scala"].includes(language) && sliceType === "usages" && !safeExistsSync(semanticsSlicesFile) ) { console.log( `Semantics slices file "${semanticsSlicesFile}" was not generated successfully. Try running atom cli in Java mode.`, ); } return { tempDir: sliceOutputDir, slicesFile, atomFile, openapiSpecFile: resolve(join(filePath, openapiSpecFile)), semanticsSlicesFile, }; } export function purlToLanguage(purl, filePath) { let language = undefined; const purlObj = PackageURL.fromString(purl); switch (purlObj.type) { case "maven": language = filePath?.endsWith(".jar") ? "jar" : "java"; break; case "npm": language = "javascript"; break; case "pypi": language = "python"; break; case "composer": language = "php"; break; case "gem": language = "ruby"; break; case "generic": language = "c"; } return language; } export function initFromSbom(components, language) { const purlLocationMap = {}; const purlImportsMap = {}; for (const comp of components) { if (!comp || !comp.evidence) { continue; } if (["php", "ruby"].includes(language)) { (comp.properties || []) .filter((v) => v.name === "Namespaces") .forEach((v) => { purlImportsMap[comp.purl] = (v.value || "").split(", "); }); } else { (comp.properties || []) .filter((v) => v.name === "ImportedModules") .forEach((v) => { purlImportsMap[comp.purl] = (v.value || "").split(","); }); } if (comp.evidence.occurrences) { purlLocationMap[comp.purl] = new Set( comp.evidence.occurrences.map((v) => v.location), ); } } return { purlLocationMap, purlImportsMap, }; } function usableSlicesFile(slicesFile) { if (!slicesFile || !safeExistsSync(slicesFile)) { return false; } const stats = fs.statSync(slicesFile); if (!stats.isFile()) { return false; } const fileSizeInBytes = stats.size; return fileSizeInBytes > 1024; } /** * Function to analyze the project * * @param {Object} dbObjMap DB and model instances * @param {Object} options Command line options */ export async function analyzeProject(dbObjMap, options) { const dirPath = options._[0] || "."; const languages = options.language; const language = Array.isArray(languages) ? languages[0] : languages; let usageSlice = undefined; let dataFlowSlice = undefined; let reachablesSlice = undefined; let semanticsSlice = undefined; let usagesSlicesFile = undefined; let dataFlowSlicesFile = undefined; let reachablesSlicesFile = undefined; let semanticsSlicesFile = undefined; let dataFlowFrames = {}; let servicesMap = {}; let retMap = {}; let userDefinedTypesMap = {}; const bomFile = options.input; const bomJson = JSON.parse(fs.readFileSync(bomFile, "utf8")); const components = bomJson.components || []; let cryptoComponents = []; let cryptoGeneratePurls = {}; let openapiSpecFile; // Load any existing purl-location information from the sbom. // For eg: cdxgen populates this information for javascript projects let { purlLocationMap, purlImportsMap } = initFromSbom(components, language); // Do reachables first so that usages slicing can reuse the atom file // We need reachables slicing even when trying to infer crypto packages if (options.withReachables || options.includeCrypto) { if ( options.reachablesSlicesFile && usableSlicesFile(options.reachablesSlicesFile) ) { reachablesSlicesFile = options.reachablesSlicesFile; reachablesSlice = JSON.parse( fs.readFileSync(options.reachablesSlicesFile, "utf-8"), ); } else { retMap = await createSlice(language, dirPath, "reachables", options); if (retMap?.slicesFile && safeExistsSync(retMap.slicesFile)) { reachablesSlicesFile = retMap.slicesFile; reachablesSlice = JSON.parse( fs.readFileSync(retMap.slicesFile, "utf-8"), ); } } } if (reachablesSlice && Object.keys(reachablesSlice).length) { const retMap = collectReachableFrames(language, reachablesSlice); dataFlowFrames = retMap.dataFlowFrames; cryptoComponents = retMap.cryptoComponents; cryptoGeneratePurls = retMap.cryptoGeneratePurls; } // Reuse existing usages slices if (options.usagesSlicesFile && usableSlicesFile(options.usagesSlicesFile)) { usageSlice = JSON.parse(fs.readFileSync(options.usagesSlicesFile, "utf-8")); usagesSlicesFile = options.usagesSlicesFile; } else { // Generate our own slices retMap = await createSlice(language, dirPath, "usages", options); if (retMap?.slicesFile && safeExistsSync(retMap.slicesFile)) { usageSlice = JSON.parse(fs.readFileSync(retMap.slicesFile, "utf-8")); usagesSlicesFile = retMap.slicesFile; } if (retMap?.semanticsSlicesFile) { options.semanticsSlicesFile = retMap.semanticsSlicesFile; semanticsSlicesFile = retMap.semanticsSlicesFile; if (DEBUG_MODE) { console.log( `Reusing the generated semantics slices file "${semanticsSlicesFile}".`, ); } } if (retMap.openapiSpecFile) { // Retain any generated openapi spec file openapiSpecFile = retMap.openapiSpecFile; } } // Support for semantics slicing if ( (PROJECT_TYPE_ALIASES.swift.includes(language) || PROJECT_TYPE_ALIASES.scala.includes(language)) && components.length ) { // Reuse existing semantics slices for swift and scala if ( options.semanticsSlicesFile && safeExistsSync(options.semanticsSlicesFile) ) { semanticsSlice = JSON.parse( fs.readFileSync(options.semanticsSlicesFile, "utf-8"), ); semanticsSlicesFile = options.semanticsSlicesFile; } else if (PROJECT_TYPE_ALIASES.swift.includes(language)) { // Generate our own slices for swift retMap = await createSlice(language, dirPath, "semantics", options); if (retMap?.slicesFile && safeExistsSync(retMap.slicesFile)) { semanticsSlice = JSON.parse( fs.readFileSync(retMap.slicesFile, "utf-8"), ); semanticsSlicesFile = retMap.slicesFile; } } } // Parse usage slices if (usageSlice && Object.keys(usageSlice).length) { const retMap = await parseObjectSlices( language, usageSlice, dbObjMap, servicesMap, purlLocationMap, purlImportsMap, openapiSpecFile, ); purlLocationMap = retMap.purlLocationMap; servicesMap = retMap.servicesMap; userDefinedTypesMap = retMap.userDefinedTypesMap; } // Parse the semantics slices for swift and scala if ( semanticsSlice && Object.keys(semanticsSlice).length && components.length ) { // Identify the purl locations const retMap = parseSemanticSlices(language, components, semanticsSlice); purlLocationMap = retMap.purlLocationMap; } if (options.withDataFlow) { if ( options.dataFlowSlicesFile && safeExistsSync(options.dataFlowSlicesFile) ) { dataFlowSlicesFile = options.dataFlowSlicesFile; dataFlowSlice = JSON.parse( fs.readFileSync(options.dataFlowSlicesFile, "utf-8"), ); } else if (!PROJECT_TYPE_ALIASES.scala.includes(language)) { retMap = await createSlice(language, dirPath, "data-flow", options); if (retMap?.slicesFile && safeExistsSync(retMap.slicesFile)) { dataFlowSlicesFile = retMap.slicesFile; dataFlowSlice = JSON.parse(fs.readFileSync(retMap.slicesFile, "utf-8")); } } } if (dataFlowSlice && Object.keys(dataFlowSlice).length) { dataFlowFrames = await collectDataFlowFrames( language, userDefinedTypesMap, dataFlowSlice, dbObjMap, purlLocationMap, purlImportsMap, ); } return { atomFile: retMap?.atomFile, usagesSlicesFile, dataFlowSlicesFile, reachablesSlicesFile, semanticsSlicesFile, purlLocationMap, servicesMap, dataFlowFrames, tempDir: retMap?.tempDir, userDefinedTypesMap, cryptoComponents, cryptoGeneratePurls, openapiSpecFile, }; } export async function parseObjectSlices( language, usageSlice, dbObjMap, servicesMap = {}, purlLocationMap = {}, purlImportsMap = {}, openapiSpecFile = undefined, ) { let openapiServicesMode = false; if (!usageSlice || !Object.keys(usageSlice).length) { return purlLocationMap; } const userDefinedTypesMap = {}; (usageSlice.userDefinedTypes || []).forEach((ut) => { userDefinedTypesMap[ut.name] = true; }); for (const slice of [ ...(usageSlice.objectSlices || []), ...(usageSlice.userDefinedTypes || []), ]) { // Skip the library code typically without filename if ( !slice.fileName || !slice.fileName.trim().length || slice.fileName === "<empty>" || slice.fileName === "<unknown>" ) { continue; } await parseSliceUsages( language, userDefinedTypesMap, slice, dbObjMap, purlLocationMap, purlImportsMap, ); // Prefer openapi for identifying services if ( !openapiServicesMode && openapiSpecFile && safeExistsSync(openapiSpecFile) ) { detectServicesFromOpenAPI(language, openapiSpecFile, servicesMap); if (servicesMap && Object.keys(servicesMap).length) { openapiServicesMode = true; } } // Only identify services from usage slices as a fallback if (!openapiServicesMode) { detectServicesFromUsages(language, slice, servicesMap); } } // Only identify services from user defined types as a second fallback if (!openapiServicesMode) { detectServicesFromUDT(language, usageSlice.userDefinedTypes, servicesMap); } return { purlLocationMap, servicesMap, userDefinedTypesMap, }; } /** * The implementation of this function is based on the logic proposed in the atom slices specification * https://github.com/AppThreat/atom/blob/main/specification/docs/slices.md#use * * @param {string} language Application language * @param {Object} userDefinedTypesMap User Defined types in the application * @param {Array} slice Usages array for each objectSlice * @param {Object} dbObjMap DB Models * @param {Object} purlLocationMap Object to track locations where purls are used * @param {Object} purlImportsMap Object to track package urls and their import aliases * @returns */ export async function parseSliceUsages( language, userDefinedTypesMap, slice, dbObjMap, purlLocationMap, purlImportsMap, ) { const fileName = slice.fileName; const typesToLookup = new Set(); const lKeyOverrides = {}; const usages = slice.usages || []; // What should be the line number to use. slice.lineNumber would be quite coarse and could lead to reports such as // #1670. Line numbers under targetObj and definedBy is a safe bet for dynamic languages, but occassionally leads to // confusion when inter-procedural tracking works better than expected. let sliceLineNumber; if (["java", "jar"].includes(language)) { sliceLineNumber = slice.lineNumber; } // Annotations from usages if (slice.signature?.startsWith("@") && !usages.length) { typesToLookup.add(slice.fullName); addToOverrides(lKeyOverrides, slice.fullName, fileName, slice.lineNumber); } // PHP imports from usages if (slice.code?.startsWith("use") && !usages.length) { typesToLookup.add(slice.fullName); addToOverrides(lKeyOverrides, slice.fullName, fileName, slice.lineNumber); } for (const ausage of usages) { const ausageLine = sliceLineNumber || ausage?.targetObj?.lineNumber || ausage?.definedBy?.lineNumber; // First capture the types in the targetObj and definedBy for (const atype of [ [ausage?.targetObj?.isExternal, ausage?.targetObj?.typeFullName], [ausage?.targetObj?.isExternal, ausage?.targetObj?.resolvedMethod], [ausage?.definedBy?.name?.includes("::"), ausage?.definedBy?.name], [ausage?.definedBy?.isExternal, ausage?.definedBy?.typeFullName], [ausage?.definedBy?.isExternal, ausage?.definedBy?.resolvedMethod], ...(ausage?.fields || []).map((f) => [f?.isExternal, f?.typeFullName]), ]) { if ( !atype[0] && (!atype[1] || ["ANY", "(...)", "<empty>"].includes(atype[1])) ) { continue; } if ( atype[0] !== false && !isFilterableType(language, userDefinedTypesMap, atype[1]) ) { if (!atype[1].includes("(") && !atype[1].includes(".py")) { typesToLookup.add(simplifyType(atype[1])); // Javascript and Ruby calls can be resolved to a precise line number only from the call nodes if ( ["javascript", "js", "ts", "typescript", "ruby"].includes( language, ) && ausageLine ) { if (atype[1].includes(":")) { typesToLookup.add( simplifyType(atype[1].split("::")[0].replace(/:/g, "/")), ); } addToOverrides(lKeyOverrides, atype[1], fileName, ausageLine); } } const maybeClassType = getClassTypeFromSignature(language, atype[1]); typesToLookup.add(maybeClassType); if (ausageLine) { addToOverrides(lKeyOverrides, maybeClassType, fileName, ausageLine); } } } // Now capture full method signatures from invokedCalls, argToCalls including the paramtypes for (const acall of [] .concat(ausage?.invokedCalls || []) .concat(ausage?.argToCalls || []) .concat(ausage?.procedures || [])) { if ( acall.resolvedMethod?.startsWith("@") || acall?.callName?.includes("::") ) { typesToLookup.add(acall.callName); if (acall.lineNumber) { addToOverrides( lKeyOverrides, acall.callName, fileName, acall.lineNumber, ); } } else if (acall.isExternal === false) { continue; } if ( !isFilterableType(language, userDefinedTypesMap, acall?.resolvedMethod) ) { if ( !acall?.resolvedMethod.includes("(") && !acall?.resolvedMethod.includes(".py") ) { typesToLookup.add(simplifyType(acall?.resolvedMethod)); // Javascript calls can be resolved to a precise line number only from the call nodes if (acall.lineNumber) { addToOverrides( lKeyOverrides, acall?.resolvedMethod, fileName, acall.lineNumber, ); } } const maybeClassType = getClassTypeFromSignature( language, acall?.resolvedMethod, ); typesToLookup.add(maybeClassType); if (acall.lineNumber) { addToOverrides( lKeyOverrides, maybeClassType, fileName, acall.lineNumber, ); } } for (const aparamType of acall?.paramTypes || []) { if (!isFilterableType(language, userDefinedTypesMap, aparamType)) { if (!aparamType.includes("(") && !aparamType.includes(".py")) { typesToLookup.add(simplifyType(aparamType)); if (acall.lineNumber) { if (aparamType.includes(":")) { typesToLookup.add( simplifyType(aparamType.split("::")[0].replace(/:/g, "/")), ); } addToOverrides( lKeyOverrides, aparamType, fileName, acall.lineNumber, ); } } const maybeClassType = getClassTypeFromSignature( language, aparamType, ); typesToLookup.add(maybeClassType); if (acall.lineNumber) { addToOverrides( lKeyOverrides, maybeClassType, fileName, acall.lineNumber, ); } } } } } for (const atype of typesToLookup) { if (isFilterableType(language, userDefinedTypesMap, atype)) { continue; } if (purlImportsMap && Object.keys(purlImportsMap).length) { for (const apurl of Object.keys(purlImportsMap)) { const apurlImports = purlImportsMap[apurl]; if (["php", "python", "ruby"].includes(language)) { for (const aimp of apurlImports) { if ( atype.startsWith(aimp) || (language === "ruby" && aimp.startsWith(atype)) ) { if (!purlLocationMap[apurl]) { purlLocationMap[apurl] = new Set(); } if (lKeyOverrides[atype]) { purlLocationMap[apurl].add(...lKeyOverrides[atype]); } } } } else { if (apurlImports?.includes(atype)) { if (!purlLocationMap[apurl]) { purlLocationMap[apurl] = new Set(); } if (lKeyOverrides[atype]) { purlLocationMap[apurl].add(...lKeyOverrides[atype]); } } } } } else { // Check the namespaces db let nsHits = typePurlsCache[atype]; if (!nsHits && ["java", "jar"].includes(language)) { nsHits = await dbObjMap.Namespaces.findAll({ attributes: ["purl"], where: { data: { [Op.like]: `%${atype}%`, }, }, }); } if (nsHits?.length) { for (const ns of nsHits) { if (!purlLocationMap[ns.purl]) { purlLocationMap[ns.purl] = new Set(); } if (lKeyOverrides[atype]) { purlLocationMap[ns.purl].add(...lKeyOverrides[atype]); } } typePurlsCache[atype] = nsHits; } else { // Avoid persistent lookups typePurlsCache[atype] = []; } } } } /** * Method to parse semantic slice data. Currently supported for swift and scala languages. * * @param {String} language Project language. * @param {Array} components Components from the input SBOM * @param {Object} semanticsSlice Semantic slice data * @returns {Object} Parsed metadata */ export function parseSemanticSlices(language, components, semanticsSlice) { // For scala, use the dedicated scalasem module. if (language === "scala") { return findPurlLocations(components, semanticsSlice); } const componentNamePurlMap = {}; const componentSymbolsMap = {}; const allObfuscationsMap = {}; // We have two attributes in the semantics slice to expand a given module to its constituent symbols // - A less precise buildSymbols, which is obtained by parsing the various output-file-map.json files // - A granular and precise moduleInfos, which has the exact classes, protocols, enums etc belonging to each module // The objective then is to build the purlLocationMap, which is the list of locations where a given purl is used // For this, we have two attributes: // - A simpler fileStructures attribute that has the mapping between a swift file and the list of referenced types // - A granular fileIndexes attribute that contains information about the clang modules and line number information // We first need to map out the component names to their purls // This is because the semantics slice use the module names everywhere for (const comp of components) { componentNamePurlMap[comp.name] = comp.purl; if (!componentSymbolsMap[comp.name]) { componentSymbolsMap[comp.name] = new Set(); } if (semanticsSlice?.buildSymbols[comp.name]) { for (const asym of semanticsSlice.buildSymbols[comp.name]) { componentSymbolsMap[comp.name].add(asym); } } const moduleInfo = semanticsSlice?.moduleInfos[comp.name] || {}; if (moduleInfo.classes) { for (const asym of moduleInfo.classes) { componentSymbolsMap[comp.name].add(asym); } } if (moduleInfo.protocols) { for (const asym of moduleInfo.protocols) { componentSymbolsMap[comp.name].add(asym); } } if (moduleInfo.enums) { for (const asym of moduleInfo.enums) { componentSymbolsMap[comp.name].add(asym); } } // Now collect the method signatures from class and protocol methods if (moduleInfo.classMethods) { for (const aclassName of Object.keys(moduleInfo.classMethods)) { for (const asym of moduleInfo.classMethods[aclassName]) { componentSymbolsMap[comp.name].add(asym); } } } if (moduleInfo.protocolMethods) { for (const aclassName of Object.keys(moduleInfo.protocolMethods)) { for (const asym of moduleInfo.protocolMethods[aclassName]) { componentSymbolsMap[comp.name].add(asym); } } } // Build a large obfuscation map if (moduleInfo.obfuscationMap) { for (const asym of Object.keys(moduleInfo.obfuscationMap)) { allObfuscationsMap[asym] = moduleInfo.obfuscationMap[asym]; } } } const purlLocationsSet = {}; // We now have a good set of data in componentSymbolsMap and allObfuscationsMap // We can iterate these symbols and check if they exist under fileIndexes.symbolLocations for (const compName of Object.keys(componentSymbolsMap)) { const compSymbols = Array.from(componentSymbolsMap[compName]); const searchHits = searchSymbolLocations( compSymbols, semanticsSlice?.fileIndexes, ); // We have an occurrence hit. Let's populate the purlLocationMap if (searchHits?.length) { const locations = purlLocationsSet[componentNamePurlMap[compName]] || new Set(); for (const ahit of searchHits) { for (const aline of ahit.lineNumbers) { locations.add(`${ahit.file}#${aline}`); } } purlLocationsSet[componentNamePurlMap[compName]] = locations; } } const purlLocationMap = {}; for (const apurl of Object.keys(purlLocationsSet)) { purlLocationMap[apurl] = Array.from(purlLocationsSet[apurl]).sort(); } return { purlLocationMap }; } function searchSymbolLocations(compSymbols, fileIndexes) { const searchHits = []; if (!fileIndexes) { return undefined; } for (const aswiftFile of Object.keys(fileIndexes)) { if (!fileIndexes[aswiftFile].symbolLocations) { continue; } for (const asym of Object.keys(fileIndexes[aswiftFile].symbolLocations)) { const lineNumbers = fileIndexes[aswiftFile].symbolLocations[asym]; if (compSymbols.includes(asym)) { searchHits.push({ file: aswiftFile, symbol: asym, lineNumbers, }); } } } return searchHits; } export function isFilterableType(language, userDefinedTypesMap, typeFullName) { if ( !typeFullName || ["ANY", "UNKNOWN", "VOID", "IMPORT"].includes(typeFullName.toUpperCase()) ) { return true; } for (const ab of [ "<operator", "<unresolved", "<unknownFullName", "__builtin", "LAMBDA", "../", ]) { if (typeFullName.startsWith(ab)) { return true; } } if (language && ["java", "jar"].includes(language)) { if ( !typeFullName.includes(".") || typeFullName.startsWith("@") || typeFullName.startsWith("java.") || typeFullName.startsWith("sun.") || typeFullName.startsWith("jdk.") || typeFullName.startsWith("org.w3c.") || typeFullName.startsWith("org.xml.") || typeFullName.startsWith("javax.xml.") ) { return true; } } if (["javascript", "js", "ts", "typescript"].includes(language)) { if ( typeFullName.includes(".js") || typeFullName.includes("=>") || typeFullName.startsWith("__") || typeFullName.startsWith("{ ") || typeFullName.startsWith("JSON") || typeFullName.startsWith("void:") || typeFullName.startsWith("node:") ) { return true; } } if (["python", "py"].includes(language)) { if ( typeFullName.startsWith("tmp") || typeFullName.startsWith("self.") || typeFullName.startsWith("_") || typeFullName.startsWith("def ") ) { return true; } } if (["php"].includes(language)) { if (!typeFullName.includes("\\") && !typeFullName.startsWith("use")) { return true; } } if (["ruby"].includes(language)) { if ( !typeFullName || ["<empty>"].includes(typeFullName) || typeFullName.startsWith("__core.") || typeFullName.startsWith("@") || typeFullName.toLowerCase() === typeFullName ) { return true; } } return !!userDefinedTypesMap[typeFullName]; } export function detectServicesFromOpenAPI( _language, openapiSpecFile, servicesMap, ) { try { const specData = JSON.parse( fs.readFileSync(openapiSpecFile, { encoding: "utf-8" }), ); if (!specData?.paths || !Object.keys(specData.paths).length) { return; } for (const aurlPattern of Object.keys(specData.paths)) { const httpMethodObj = specData.paths[aurlPattern]; for (const httpMethod of Object.keys(httpMethodObj)) { const hobj = httpMethodObj[httpMethod]; const serviceName = `service-${aurlPattern.replaceAll("/", "")}-${httpMethod}`; const operationId = hobj["operationId"]; const properties = [ { name: "cdx:service:httpMethod", value: httpMethod }, ]; if (operationId) { properties.push({ name: "internal:operationId", value: operationId }); } servicesMap[serviceName] = { endpoints: new Set([aurlPattern]), authenticated: undefined, xTrustBoundary: undefined, properties, }; } } } catch (e) { return; } } /** * Method to detect services from annotation objects in the usage slice * * @param {string} language Application language * @param {Array} slice Usages array for each objectSlice * @param {Object} servicesMap Existing service map */ export function detectServicesFromUsages(language, slice, servicesMap = {}) { const usages = slice.usages; if (!usages) { return []; } for (const usage of usages) { const targetObj = usage?.targetObj; const definedBy = usage?.definedBy; let endpoints = []; let authenticated = undefined; if (language === "ruby" && definedBy?.name?.includes("/")) { endpoints = extractEndpoints(language, definedBy.name); } else if (targetObj?.resolvedMethod) { if (language !== "php") { endpoints = extractEndpoints(language, targetObj?.resolvedMethod); } if (targetObj?.resolvedMethod.toLowerCase().includes("auth")) { authenticated = true; } } else if (definedBy?.resolvedMethod) { if (language !== "php") { endpoints = extractEndpoints(language, definedBy?.resolvedMethod); } if (definedBy?.resolvedMethod.toLowerCase().includes("auth")) { authenticated = true; } } if (usage.invokedCalls) { for (const acall of usage.invokedCalls) { if (acall.resolvedMethod) { if (language !== "php") { const tmpEndpoints = extractEndpoints( language, acall.resolvedMethod, ); if (acall.resolvedMethod.toLowerCase().includes("auth")) { authenticated = true; } if (tmpEndpoints?.length) { endpoints = (endpoints || []).concat(tmpEndpoints); } } } } } if (endpoints?.length) { const serviceName = constructServiceName(language, slice); if (!servicesMap[serviceName]) { servicesMap[serviceName] = { endpoints: new Set(), authenticated, xTrustBoundary: authenticated === true ? true : undefined, }; } for (const endpoint of endpoints) { servicesMap[serviceName].endpoints.add(endpoint); } } } } /** * Method to detect services from user defined types in the usage slice * * @param {string} language Application language * @param {Array} userDefinedTypes User defined types * @param {Object} servicesMap Existing service map */ export function detectServicesFromUDT(language, userDefinedTypes, servicesMap) { if ( ["python", "py", "c", "cpp", "c++", "php", "ruby"].includes(language) && userDefinedTypes && userDefinedTypes.length ) { for (const audt of userDefinedTypes) { if ( audt.name.toLowerCase().includes("route") || audt.name.toLowerCase().includes("path") || audt.name.toLowerCase().includes("url") || audt.name.toLowerCase().includes("registerhandler") || audt.name.toLowerCase().includes("endpoint") || audt.name.toLowerCase().includes("api") || audt.name.toLowerCase().includes("add_method") || audt.name.toLowerCase().includes("get") || audt.name.toLowerCase().includes("post") || audt.name.toLowerCase().includes("delete") || audt.name.toLowerCase().includes("put") || audt.name.toLowerCase().includes("head") || audt.name.toLowerCase().includes("options") || audt.name.toLowerCase().includes("addRoute") || audt.name.toLowerCase().includes("connect") ) { const fields = audt.fields || []; if ( fields.length && fields[0] && fields[0].name && fields[0].name.length > 1 ) { const endpoints = extractEndpoints(language, fields[0].name); let serviceName = "service"; if (audt.fileName) { serviceName = `${path.basename( audt.fileName.replace(".py", ""), )}-service`; } if (endpoints?.length) { if (!servicesMap[serviceName]) { servicesMap[serviceName] = { endpoints: new Set(), authenticated: false, xTrustBoundary: undefined, }; } for (const endpoint of endpoints) { servicesMap[serviceName].endpoints.add(endpoint); } } } } } } } export function constructServiceName(_language, slice) { let serviceName = "service"; if (slice?.fullName) { serviceName = slice.fullName.split(":")[0].replace(/\./g, "-"); } else if (slice?.fileName) { serviceName = path.basename(slice.fileName).split(".")[0]; } if (!serviceName.endsWith("service")) { serviceName = `${serviceName}-service`; } return serviceName; } export function extractEndpoints(language, code) { if (!code) { return undefined; } let endpoints = undefined; switch (language) { case "java": case "jar": if ( code.startsWith("@") && (code.includes("Mapping") || code.includes("Path")) && code.includes("(") ) { const matches = code.match(/['"](.*?)['"]/gi) || []; endpoints = matches .map((v) => v.replace(/["']/g, "")) .filter( (v) => v.length && !v.startsWith(".") && v.includes("/") && !v.startsWith("@"), ); } break; case "js": case "ts": case "javascript": case "typescript": if (code.includes("app.") || code.includes("route")) { const matches = code.match(/['"](.*?)['"]/gi) || []; endpoints = matches .map((v) => v.replace(/["']/g, "")) .filter( (v) => v.length && !v.startsWith(".") && v.includes("/") && !v.startsWith("@") && !v.startsWith("application/") && !v.startsWith("text/"), ); } break; case "ruby": case "rb": { let urlPrefix = ""; let urlSuffix = ""; // Remove the ellipsis added by the frontend code = code.replaceAll("...", ""); if (code.includes("namespace ")) { urlPrefix = code.split("namespace ").pop().split(" ")[0]; } else if (code.includes("collection do get ")) { urlPrefix = code.split("collection do get ").pop().split(" ")[0]; } for (const m of ["get", "post", "delete", "options", "put", "head"]) { if (code.includes(`${m} `)) { urlSuffix = code.split(`${m} `).pop().split(" ")[0]; } } if (code.includes("http") && code.includes('"')) { endpoints = code.split('"').filter((s) => s.startsWith("http")); } if (urlPrefix !== "" || urlSuffix !== "") { if (!endpoints) { endpoints = []; } endpoints.push( `${urlPrefix.replace(/['"]/g, "")}${urlSuffix.replace(/['"]/g, "")}`, ); } endpoints = endpoints && Array.isArray(endpoints) ? endpoints.filter( (u) => u.length > 1 && !u.startsWith(".") && u !== "https:/", ) : endpoints; break; } default: endpoints = (code.match(/['"](.*?)['"]/gi) || []) .map((v) => v.replace(/["']/g, "").replace("\n", "")) .filter((v) => v.length > 2 && v.includes("/")); break; } return endpoints; } /** * Method to create the SBOM with evidence file called evinse file. * * @param {Object} sliceArtefacts Various artefacts from the slice operation * @param {Object} options Command line options * @returns */ export function createEvinseFile(sliceArtefacts, options) { const { tempDir, usagesSlicesFile, dataFlowSlicesFile, reachablesSlicesFile, purlLocationMap, servicesMap, dataFlowFrames, cryptoComponents, cryptoGeneratePurls, } = sliceArtefacts; const bomFile = options.input; const evinseOutFile = options.output; const bomJson = JSON.parse(fs.readFileSync(bomFile, "utf8")); const components = bomJson.components || []; // Clear existing annotations bomJson.annotations = []; let occEvidencePresent = false; let csEvidencePresent = false; let servicesPresent = false; for (const comp of components) { if (!comp.purl) { continue; } delete comp.signature; const locationOccurrences = Array.from( purlLocationMap[comp.purl] || [], ).sort(); if (locationOccurrences.length) { if (!comp.evidence) { comp.evidence = {}; } // This step would replace any existing occurrences // This is fine as long as the input sbom was also generated by cdxgen comp.evidence.occurrences = locationOccurrences .filter((l) => !!l) .map((l) => ({ location: l, })); occEvidencePresent = true; } const dfFrames = dataFlowFrames[comp.purl]; if (dfFrames?.length) { if (!comp.evidence) { comp.evidence = {}; } if (!comp.evidence.callstack) { comp.evidence.callstack = {}; } if (!comp.evidence.callstack.frames) { comp.evidence.callstack.frames = framePicker(dfFrames); csEvidencePresent = true; } } // Add crypto tags if this purl offers any generation algorithm if ( cryptoGeneratePurls?.[comp.purl] && Array.from(cryptoGeneratePurls[comp.purl]).length ) { comp.tags = ["crypto", "crypto-generate"]; } } // for if (servicesMap && Object.keys(servicesMap).length) { const services = []; for (const serviceName of Object.keys(servicesMap)) { services.push({ name: serviceName, endpoints: Array.from(servicesMap[serviceName].endpoints), authenticated: servicesMap[serviceName].authenticated, "x-trust-boundary": servicesMap[serviceName].xTrustBoundary, properties: servicesMap[serviceName].properties, }); } // Add to existing services bomJson.services = (bomJson.services || []).concat(services); servicesPresent = true; } // Add the crypto components to the components list if (cryptoComponents?.length) { bomJson.components = bomJson.components.concat(cryptoComponents); } // Fix the dependencies section with provides information if ( cryptoGeneratePurls && Object.keys(cryptoGeneratePurls).length && bomJson.dependencies ) { const newDependencies = []; for (const depObj of bomJson.dependencies) { if (depObj.ref && cryptoGeneratePurls[depObj.ref]) { const providedAlgos = Array.from(cryptoGeneratePurls[depObj.ref]); if (providedAlgos.length) { depObj.provides = providedAlgos; } } newDependencies.push(depObj); } bomJson.dependencies = newDependencies; } if (options.annotate) { if (usagesSlicesFile && safeExistsSync(usagesSlicesFile)) { bomJson.annotations.push({ subjects: [bomJson.serialNumber], annotator: { component: bomJson.metadata.tools.components[0] }, timestamp: getTimestamp(), text: fs.readFileSync(usagesSlicesFile, "utf8"), }); } if (dataFlowSlicesFile && safeExistsSync(dataFlowSlicesFile)) { bomJson.annotations.push({ subjects: [bomJson.serialNumber], annotator: { component: bomJson.metadata.tools.components[0] }, timestamp: getTimestamp(), text: fs.readFileSync(dataFlowSlicesFile, "utf8"), }); } if (reachablesSlicesFile && safeExistsSync(reachablesSlicesFile)) { bomJson.annotations.push({ subjects: [bomJson.serialNumber], annotator: { component: bomJson.metadata.tools.components[0] }, timestamp: getTimestamp(), text: fs.readFileSync(reachablesSlicesFile, "utf8"), }); } } // Increment the version bomJson.version = (bomJson.version || 1) + 1; // Set the current timestamp to indicate this is newer bomJson.metadata.timestamp = getTimestamp(); delete bomJson.signature; // Redo post-processing with evinse data const bomNSData = postProcess({ bomJson }, options); fs.writeFileSync( evinseOutFile, JSON.stringify(bomNSData.bomJson, null, options.jsonPretty ? 2 : null), ); if (occEvidencePresent || csEvidencePresent || servicesPresent) { console.log(evinseOutFile, "created successfully."); } else { console.log( "Unable to identify component evidence for the input SBOM based on the slices from atom. The slice