@cyclonedx/cdxgen
Version:
Creates CycloneDX Software Bill of Materials (SBOM) from source or container image
323 lines (312 loc) • 9.73 kB
JavaScript
import { readFileSync } from "node:fs";
import { basename, extname } from "node:path";
import {
credentialIndicatorsForText,
isLocalHost,
providerNamesForText,
sanitizeMcpRefToken,
} from "./mcpDiscovery.js";
import { sanitizeBomUrl } from "./propertySanitizer.js";
import { scanTextForHiddenUnicode } from "./unicodeScan.js";
const AGENT_FILE_PATTERNS = [
"AGENTS.md",
"agents.md",
"CLAUDE.md",
"Cursor.md",
".github/copilot-instructions.md",
".github/instructions/**/*.{md,mdx,markdown,txt}",
".github/workflows/copilot-setup-steps.yml",
"**/*.{prompt,mdc}",
];
const MCP_PACKAGE_REF_PATTERN =
/@modelcontextprotocol\/[a-z0-9._/-]+|@[a-z0-9._-]+\/mcp[a-z0-9._/-]*/giu;
const URL_PATTERN = /https?:\/\/[^\s<>"')\]}]+/giu;
const AUTH_HINT_PATTERNS = [
["bearer", /\bbearer\b/i],
["oauth", /\boauth\b|authorization_endpoint|token_endpoint|issuer/i],
["api-key", /\bapi[_ -]?key\b/i],
["token", /\btoken\b|authorization:/i],
];
const TUNNEL_HOST_PATTERNS = [
/\.ngrok(?:-free)?\.app$/i,
/\.ngrok\.io$/i,
/\.trycloudflare\.com$/i,
/\.localhost\.run$/i,
/\.serveo\.net$/i,
];
function syntaxForFile(filePath) {
const extension = extname(filePath).toLowerCase();
if ([".md", ".mdx", ".markdown"].includes(extension)) {
return "markdown";
}
if ([".yaml", ".yml"].includes(extension)) {
return "yaml";
}
return "text";
}
function kindForFile(filePath) {
const lowerPath = filePath.toLowerCase();
if (lowerPath.endsWith("copilot-setup-steps.yml")) {
return "copilot-setup-workflow";
}
if (lowerPath.endsWith("copilot-instructions.md")) {
return "copilot-instructions";
}
if (lowerPath.endsWith("agents.md") || lowerPath.endsWith("claude.md")) {
return "agent-instructions";
}
if (lowerPath.endsWith(".prompt") || lowerPath.endsWith(".mdc")) {
return "skill-file";
}
return "ai-agent-file";
}
function authHintsForText(text) {
return AUTH_HINT_PATTERNS.flatMap(([name, pattern]) =>
pattern.test(text) ? [name] : [],
);
}
function packageRefsForText(text) {
return [...new Set(text.match(MCP_PACKAGE_REF_PATTERN) || [])].sort();
}
function mcpUrlsForText(text) {
const urls = [];
for (const match of text.match(URL_PATTERN) || []) {
try {
const parsed = new URL(match);
if (
parsed.pathname.toLowerCase().includes("/mcp") ||
parsed.hostname.toLowerCase().includes("modelcontextprotocol")
) {
urls.push(parsed.toString());
}
} catch {
// Ignore malformed URLs in untrusted agent instructions.
}
}
return [...new Set(urls)].sort();
}
function buildInferredMcpServices(filePath, mcpUrls, authHints, providerNames) {
return mcpUrls.map((urlValue, index) => {
const parsed = new URL(urlValue);
const hostname = parsed.hostname.toLowerCase();
const exposureType = isLocalHost(hostname)
? "local-only"
: "networked-public";
const properties = [
{ name: "SrcFile", value: filePath },
{ name: "cdx:mcp:serviceType", value: "inferred-endpoint" },
{ name: "cdx:mcp:inventorySource", value: "agent-file" },
{ name: "cdx:mcp:usageConfidence", value: "medium" },
{ name: "cdx:mcp:reviewNeeded", value: "true" },
{ name: "cdx:mcp:exposureType", value: exposureType },
{ name: "cdx:mcp:agentReference", value: "true" },
];
if (providerNames.length) {
properties.push({
name: "cdx:mcp:providerNames",
value: providerNames.join(","),
});
}
if (authHints.length) {
properties.push({
name: "cdx:mcp:authMode",
value: authHints.join(","),
});
}
return {
"bom-ref": `urn:service:agent-mcp:${sanitizeMcpRefToken(hostname || basename(filePath))}:${index + 1}`,
group: "mcp",
name: hostname || `${basename(filePath)}-mcp-endpoint`,
endpoints: [sanitizeBomUrl(urlValue)],
properties,
version: "inferred",
};
});
}
/**
* Discover AI agent instruction and skill files that can hide MCP/runtime
* surfaces from package-only inventory.
*/
export const agentFormulationParser = {
id: "agent-formulation",
patterns: AGENT_FILE_PATTERNS,
parse(files, _options = {}) {
const components = [];
const services = [];
for (const filePath of files || []) {
let raw;
try {
raw = readFileSync(filePath, "utf-8");
} catch {
continue;
}
const hiddenUnicodeScan = scanTextForHiddenUnicode(raw, {
syntax: syntaxForFile(filePath),
});
const packageRefs = packageRefsForText(raw);
const providerNames = providerNamesForText(raw);
const mcpUrls = mcpUrlsForText(raw);
const authHints = authHintsForText(raw);
const credentialIndicators = credentialIndicatorsForText(raw);
const mcpHosts = mcpUrls.map((urlValue) => new URL(urlValue).hostname);
const hasPublicMcpEndpoint = mcpHosts.some((host) => !isLocalHost(host));
const hasTunnelReference = mcpHosts.some((host) =>
TUNNEL_HOST_PATTERNS.some((pattern) => pattern.test(host)),
);
const hasMcpReferences =
mcpUrls.length > 0 ||
packageRefs.length > 0 ||
/\bmcp\b/i.test(raw) ||
/modelcontextprotocol/i.test(raw);
// Inventory all matched agent/instruction files, even when they do not
// yet contain hidden Unicode or explicit MCP references, so shipped files
// such as CLAUDE.md and AGENTS.md still surface in build/post-build BOMs.
if (!raw.trim().length) {
continue;
}
const hiddenComponentKinds = [];
if (mcpUrls.length) {
hiddenComponentKinds.push("mcp-endpoint");
}
if (providerNames.length) {
hiddenComponentKinds.push("provider");
}
if (packageRefs.length) {
hiddenComponentKinds.push("mcp-package-reference");
}
const properties = [
{ name: "SrcFile", value: filePath },
{ name: "cdx:file:kind", value: kindForFile(filePath) },
{ name: "cdx:agent:inventorySource", value: "agent-file" },
{ name: "cdx:agent:hasMcpReferences", value: String(hasMcpReferences) },
{
name: "cdx:agent:hiddenEndpointCount",
value: String(mcpUrls.length),
},
];
if (hiddenUnicodeScan.hasHiddenUnicode) {
properties.push(
{ name: "cdx:file:hasHiddenUnicode", value: "true" },
{
name: "cdx:file:hiddenUnicodeCodePoints",
value: hiddenUnicodeScan.codePoints.join(","),
},
{
name: "cdx:file:hiddenUnicodeLineNumbers",
value: hiddenUnicodeScan.lineNumbers.join(","),
},
);
if (hiddenUnicodeScan.inComments) {
properties.push(
{
name: "cdx:file:hiddenUnicodeInComments",
value: "true",
},
{
name: "cdx:file:hiddenUnicodeCommentCodePoints",
value: hiddenUnicodeScan.commentCodePoints.join(","),
},
);
}
}
if (packageRefs.length) {
properties.push({
name: "cdx:agent:mcpPackageRefs",
value: packageRefs.join(","),
});
if (
packageRefs.some((ref) => !ref.startsWith("@modelcontextprotocol/"))
) {
properties.push({
name: "cdx:agent:hasNonOfficialMcpReference",
value: "true",
});
}
}
if (mcpUrls.length) {
const sanitizedMcpUrls = mcpUrls.map((urlValue) =>
sanitizeBomUrl(urlValue),
);
properties.push(
{
name: "cdx:agent:hiddenMcpUrls",
value: sanitizedMcpUrls.join(","),
},
{
name: "cdx:agent:hiddenMcpHosts",
value: [...new Set(mcpHosts)].sort().join(","),
},
);
}
if (providerNames.length) {
properties.push({
name: "cdx:agent:providerNames",
value: providerNames.join(","),
});
}
if (authHints.length) {
properties.push({
name: "cdx:agent:authHints",
value: authHints.join(","),
});
}
if (credentialIndicators.length) {
properties.push(
{
name: "cdx:agent:credentialExposure",
value: "true",
},
{
name: "cdx:agent:credentialRiskIndicators",
value: credentialIndicators.join(","),
},
);
}
if (hasPublicMcpEndpoint) {
properties.push({
name: "cdx:agent:hasPublicMcpEndpoint",
value: "true",
});
}
if (hasTunnelReference) {
properties.push({
name: "cdx:agent:hasTunnelReference",
value: "true",
});
}
if (hiddenComponentKinds.length) {
properties.push({
name: "cdx:agent:hiddenComponentKinds",
value: hiddenComponentKinds.join(","),
});
}
if (
hiddenUnicodeScan.hasHiddenUnicode ||
hasPublicMcpEndpoint ||
hasTunnelReference ||
packageRefs.length > 0 ||
credentialIndicators.length > 0
) {
properties.push({
name: "cdx:agent:reviewNeeded",
value: "true",
});
}
components.push({
"bom-ref": `file:${filePath}`,
name: basename(filePath),
properties,
type: "file",
});
services.push(
...buildInferredMcpServices(
filePath,
mcpUrls,
authHints,
providerNames,
),
);
}
return { components, services };
},
};