secure-scan-js
Version:
A JavaScript implementation of Yelp's detect-secrets tool - no Python required
754 lines (670 loc) • 20.9 kB
text/typescript
import { spawn } from "child_process";
import { ScanResults, Secret } from "./types";
import * as fs from "fs";
import * as path from "path";
import * as os from "os";
import { dirname, relative } from "path";
import { promisify } from "util";
import { exec } from "child_process";
import { ScanOptions } from "./types";
interface GitleaksResult {
Description: string;
StartLine: number;
EndLine: number;
StartColumn: number;
EndColumn: number;
Match: string;
Secret: string;
File: string;
Commit: string;
Entropy: number;
Author: string;
Email: string;
Date: string;
Message: string;
Tags: string[];
RuleID: string;
Fingerprint: string;
}
/**
* Git blame information about a specific line
*/
interface GitBlameInfo {
author: string;
email: string;
date: string;
commit: string;
message: string;
}
/**
* Create a temporary Gitleaks configuration file with enhanced rules
*/
async function createGitleaksConfig(): Promise<string> {
const configContent = `# Enhanced Gitleaks Configuration
title = "Enhanced Gitleaks Configuration for secure-scan-js"
# Global allowlist
[allowlist]
description = "Global allowlist"
regexes = [
'''(?i)(?:example|sample|test|demo|placeholder|dummy|fake)''',
'''(?i)(?:localhost|127\\.0\\.0\\.1|0\\.0\\.0\\.0)''',
'''(?i)(?:password|secret|key|token)(?:\\s*[:=]\\s*)?(?:\\"|')?(?:your|my|the)?(?:\\"|')?(?:\\s+|$)''',
'''(?i)(?:TODO|FIXME|XXX)''',
]
paths = [
'''(?i).*?(?:test|spec|example|sample|demo).*?''',
'''(?i).*?(?:\\.md|\\.txt|\\.rst|\\.log)$''',
'''(?i).*?node_modules.*?''',
'''(?i).*?\\.git.*?''',
'''(?i).*?\\.next.*?''',
'''(?i).*?dist/.*?''',
'''(?i).*?build/.*?''',
]
# Enhanced entropy-based detection
[[rules]]
id = "enhanced-high-entropy-base64"
description = "High Entropy Base64 String"
regex = '''[A-Za-z0-9+/]{20,}={0,2}'''
entropy = 4.5
[[rules]]
id = "enhanced-high-entropy-hex"
description = "High Entropy Hexadecimal String"
regex = '''[a-fA-F0-9]{32,}'''
entropy = 4.0
# ... add more rules as needed ...
`;
const configPath = path.join(
os.tmpdir(),
`gitleaks-config-${Date.now()}.toml`
);
fs.writeFileSync(configPath, configContent);
return configPath;
}
/**
* Enhanced Gitleaks scan with custom configuration
*/
export async function runGitleaksScan(
directory: string,
scanOptions?: ScanOptions
): Promise<ScanResults> {
const configPath = await createGitleaksConfig();
try {
const args = ["detect", "--source", directory, "--config", configPath];
// Add advanced options for better detection
args.push("--no-banner"); // Cleaner output
// Enhanced detection options
if (scanOptions?.verbose) {
args.push("--verbose");
}
// Only add --no-git if we're not scanning git history
if (!scanOptions?.scanGitHistory) {
args.push("--no-git");
}
// Add report format options
args.push("--report-format", "json", "--report-path", "-");
// Add redact option to avoid logging actual secrets
args.push("--redact");
return new Promise((resolve, reject) => {
const gitleaks = spawn("gitleaks", args);
let output = "";
let errorOutput = "";
gitleaks.stdout.on("data", (data) => {
output += data.toString();
});
gitleaks.stderr.on("data", (data) => {
errorOutput += data.toString();
});
gitleaks.on("close", async (code) => {
// Clean up temporary config file
try {
fs.unlinkSync(configPath);
} catch (error) {
// Ignore cleanup errors
}
// Gitleaks returns exit code 1 when it finds secrets, which is not an error
if (code !== 0 && code !== 1) {
reject(
new Error(`Gitleaks failed with code ${code}: ${errorOutput}`)
);
return;
}
try {
if (output.trim()) {
const results: GitleaksResult[] = JSON.parse(output);
const secrets: Secret[] = [];
for (const result of results) {
// Enhanced filtering logic
if (shouldSkipResult(result, scanOptions)) {
continue;
}
// Get git blame information for this line
const blameInfo = await getGitBlameInfo(
path.join(directory, result.File),
result.StartLine,
scanOptions
);
// Enhanced secret object with more metadata
secrets.push({
file: result.File,
line: result.StartLine,
types: [result.RuleID],
is_false_positive:
calculateFalsePositiveProbability(result) > 0.7,
hashed_secret: result.Fingerprint || "",
author: blameInfo.author,
email: blameInfo.email,
date: blameInfo.date,
commit: blameInfo.commit,
message: blameInfo.message,
detectedBy: "gitleaks",
entropy: result.Entropy,
confidence: calculateConfidenceScore(result),
});
}
resolve({
secrets: secrets,
missed_secrets: [],
});
} else {
resolve({
secrets: [],
missed_secrets: [],
});
}
} catch (error: unknown) {
const errorMessage =
error instanceof Error ? error.message : String(error);
reject(new Error(`Failed to parse Gitleaks output: ${errorMessage}`));
}
});
gitleaks.on("error", (error) => {
// Clean up config file on error
try {
fs.unlinkSync(configPath);
} catch (cleanupError) {
// Ignore cleanup errors
}
if (error.message.includes("ENOENT")) {
reject(
new Error(
"Gitleaks is not installed. Please install Gitleaks first: https://github.com/zricethezav/gitleaks#installation"
)
);
} else {
reject(error);
}
});
});
} catch (error) {
// Clean up config file on error
try {
fs.unlinkSync(configPath);
} catch (cleanupError) {
// Ignore cleanup errors
}
throw error;
}
}
/**
* Enhanced filtering logic to reduce false positives
*/
function shouldSkipResult(
result: GitleaksResult,
scanOptions?: ScanOptions
): boolean {
// Skip node_modules files unless explicitly included
if (
(!scanOptions?.includeNodeModules &&
(result.File.includes("/node_modules/") ||
result.File.includes("\\node_modules\\"))) ||
result.File.includes("/.next/") ||
result.File.includes("\\.next\\") ||
result.File.endsWith("package-lock.json") ||
result.File.endsWith("yarn.lock") ||
result.File.endsWith("pnpm-lock.yaml")
) {
if (scanOptions?.verbose) {
console.log(`Skipping excluded file: ${result.File}`);
}
return true;
}
// Skip very low entropy results
if (result.Entropy < 2.5) {
if (scanOptions?.verbose) {
console.log(
`Skipping low entropy result: ${result.File}:${result.StartLine}`
);
}
return true;
}
// Skip common false positives
const falsePositivePatterns = [
/example/i,
/sample/i,
/test/i,
/demo/i,
/placeholder/i,
/your_api_key_here/i,
/enter_your_key/i,
];
for (const pattern of falsePositivePatterns) {
if (pattern.test(result.Match)) {
if (scanOptions?.verbose) {
console.log(
`Skipping false positive pattern: ${result.File}:${result.StartLine}`
);
}
return true;
}
}
return false;
}
/**
* Calculate confidence score for a detection
*/
function calculateConfidenceScore(result: GitleaksResult): number {
let confidence = 0.5; // Base confidence
// Higher entropy = higher confidence
if (result.Entropy > 4.0) confidence += 0.3;
else if (result.Entropy > 3.5) confidence += 0.2;
else if (result.Entropy > 3.0) confidence += 0.1;
// Longer matches = higher confidence
if (result.Match.length > 40) confidence += 0.2;
else if (result.Match.length > 20) confidence += 0.1;
// File type context
if (result.File.endsWith(".env") || result.File.includes("config")) {
confidence += 0.1;
}
// Rule-specific adjustments
if (result.RuleID.includes("aws") || result.RuleID.includes("github")) {
confidence += 0.2;
}
return Math.min(confidence, 1.0);
}
/**
* Calculate false positive probability
*/
function calculateFalsePositiveProbability(result: GitleaksResult): number {
let probability = 0.0;
// Check for common false positive indicators
if (/(?:example|sample|test|demo|placeholder)/i.test(result.Match)) {
probability += 0.4;
}
if (/(?:your|my|the)[\s_-]?(?:key|secret|token)/i.test(result.Match)) {
probability += 0.3;
}
if (result.Entropy < 3.0) {
probability += 0.2;
}
if (result.File.includes("test") || result.File.includes("spec")) {
probability += 0.2;
}
return Math.min(probability, 1.0);
}
/**
* Clone and scan a remote repository for secrets
* @param repoUrl URL of the Git repository to scan
* @param branch Optional branch to check out
* @returns Scan results
*/
export async function scanRemoteRepository(
repoUrl: string,
scanOptions?: ScanOptions
): Promise<ScanResults> {
// Create temporary directory
const tmpDir = path.join(os.tmpdir(), `secure-scan-js-${Date.now()}`);
// Clone the repository
await new Promise<void>((resolve, reject) => {
const git = spawn("git", ["clone", repoUrl, tmpDir]);
git.on("close", (code) => {
if (code !== 0) {
reject(new Error(`Failed to clone repository: ${repoUrl}`));
return;
}
resolve();
});
git.on("error", (error) => {
reject(error);
});
});
return new Promise((resolve, reject) => {
// Scan the cloned repository
try {
// Pass scanOptions to runGitleaksScan, and set scanGitHistory to true
const updatedOptions = {
...scanOptions,
scanGitHistory: true,
};
runGitleaksScan(tmpDir, updatedOptions)
.then(resolve)
.catch(reject)
.finally(() => {
// Clean up temporary directory using cross-platform Node.js fs.rmSync
try {
if (fs.existsSync(tmpDir)) {
fs.rmSync(tmpDir, { recursive: true, force: true });
}
} catch (error) {
console.warn(
`Failed to clean up temporary directory: ${
error instanceof Error ? error.message : String(error)
}`
);
}
});
} catch (error) {
reject(error);
}
});
}
/**
* Scan Git history (commits) for secrets in a local repository
* @param directory The directory containing the Git repository
* @param fromCommit Optional starting commit hash
* @param toCommit Optional ending commit hash
* @returns Scan results
*/
export async function scanGitHistory(
directory: string,
fromCommit?: string,
toCommit?: string,
scanOptions?: ScanOptions
): Promise<ScanResults> {
const args = ["detect", "--source", directory];
// Always include git history when scanning git history
// Add commit range if specified
if (fromCommit) {
if (toCommit) {
// Scan a range of commits
args.push("--log-opts", `--all ${fromCommit}..${toCommit}`);
} else {
// Scan from a specific commit to HEAD
args.push("--log-opts", `--all ${fromCommit}..HEAD`);
}
} else if (toCommit) {
// If only toCommit is specified, scan up to that commit
args.push("--log-opts", `--all ..${toCommit}`);
} else {
// Scan all history
args.push("--log-opts", "--all");
}
// Add report format options
args.push("--report-format", "json", "--report-path", "-");
return runGitleaksWithArgs(args, scanOptions);
}
/**
* Helper function to run Gitleaks with specified arguments
* @param args Command line arguments for Gitleaks
* @returns Scan results
*/
function runGitleaksWithArgs(
args: string[],
scanOptions?: ScanOptions
): Promise<ScanResults> {
return new Promise((resolve, reject) => {
const gitleaks = spawn("gitleaks", args);
let output = "";
let errorOutput = "";
gitleaks.stdout.on("data", (data) => {
output += data.toString();
});
gitleaks.stderr.on("data", (data) => {
errorOutput += data.toString();
});
gitleaks.on("close", (code) => {
// Gitleaks returns exit code 1 when it finds secrets, which is expected behavior
// Exit code 0 means no secrets found
// Any other code is a real error
if (code !== 0 && code !== 1) {
reject(new Error(`Gitleaks failed with code ${code}: ${errorOutput}`));
return;
}
try {
// If there's no output, return an empty result
if (!output.trim()) {
resolve({
secrets: [],
missed_secrets: [],
});
return;
}
// Parse the JSON output
const results: GitleaksResult[] = JSON.parse(output);
const secrets: Secret[] = [];
for (const result of results) {
// Skip node_modules files unless explicitly included
if (
(!scanOptions?.includeNodeModules &&
(result.File.includes("/node_modules/") ||
result.File.includes("\\node_modules\\"))) ||
result.File.includes("/.next/") ||
result.File.includes("\\.next\\") ||
result.File.endsWith("package-lock.json") ||
result.File.endsWith("yarn.lock") ||
result.File.endsWith("pnpm-lock.yaml")
) {
if (scanOptions?.verbose) {
if (result.File.includes("node_modules")) {
console.log(`Skipping node_modules result: ${result.File}`);
} else if (result.File.includes(".next")) {
console.log(
`Skipping .next build directory result: ${result.File}`
);
} else if (
result.File.endsWith("package-lock.json") ||
result.File.endsWith("yarn.lock") ||
result.File.endsWith("pnpm-lock.yaml")
) {
console.log(`Skipping dependency lock file: ${result.File}`);
}
}
continue;
}
// Convert Gitleaks result to our Secret format
secrets.push({
file: result.File,
line: result.StartLine,
types: [result.RuleID],
is_false_positive: false,
hashed_secret: "",
author: result.Author || "",
email: result.Email || "",
date: result.Date || "",
commit: result.Commit || "",
message: result.Message || "",
detectedBy: "gitleaks",
});
}
resolve({
secrets,
missed_secrets: [],
});
} catch (error) {
reject(
new Error(
`Failed to parse Gitleaks output: ${
error instanceof Error ? error.message : String(error)
}`
)
);
}
});
gitleaks.on("error", (error) => {
if (error.message.includes("ENOENT")) {
reject(
new Error(
"Gitleaks is not installed. Please install Gitleaks first: https://github.com/zricethezav/gitleaks#installation"
)
);
} else {
reject(error);
}
});
});
}
/**
* Get git blame information for a specific file and line
* @param filePath Path to the file
* @param lineNumber Line number to blame
* @param scanOptions Optional scan options
* @returns Object with author, email, date, and commit message
*/
export async function getGitBlameInfo(
filePath: string,
lineNumber: number,
scanOptions?: ScanOptions
): Promise<GitBlameInfo> {
try {
// Skip git blame for node_modules files
if (
filePath.includes("/node_modules/") ||
filePath.includes("\\node_modules\\")
) {
return {
author: "NodeModule",
email: "npm-package",
date: "Unknown",
commit: "N/A",
message: "Third-party module dependency",
};
}
// Skip git blame for Next.js build files
if (filePath.includes("/.next/") || filePath.includes("\\.next\\")) {
return {
author: "NextJS",
email: "build-output",
date: "Unknown",
commit: "N/A",
message: "Next.js build output",
};
}
// Skip git blame for lock files
if (
filePath.endsWith("package-lock.json") ||
filePath.endsWith("yarn.lock") ||
filePath.endsWith("pnpm-lock.yaml")
) {
return {
author: "PackageManager",
email: "auto-generated",
date: "Unknown",
commit: "N/A",
message: "Auto-generated dependency lock file",
};
}
// Determine which git repository path to use
let repoPath = "";
if (scanOptions?.gitRepoPath) {
// Use the specified git repository path
repoPath = scanOptions.gitRepoPath;
} else {
// Try to find the repository root for the file
try {
// Get the directory of the file
const fileDir = dirname(filePath);
// Find the git repository root for this file
const { stdout: gitRoot } = await promisify(exec)(
"git rev-parse --show-toplevel",
{
cwd: fileDir,
}
);
repoPath = gitRoot.trim();
} catch (error) {
// If we can't determine the repository root, use the current directory
repoPath = process.cwd();
}
}
// Get the relative path of the file to the repository root
let relativeFilePath = filePath;
try {
// Only calculate relative path if repoPath is valid and different from current directory
if (repoPath && repoPath !== process.cwd()) {
relativeFilePath = relative(repoPath, filePath);
}
} catch (error) {
// If we can't determine the relative path, use the original file path
relativeFilePath = filePath;
}
// Run git blame to get information about who last modified this line
const { stdout: blameOutput } = await promisify(exec)(
`git blame -L ${lineNumber},${lineNumber} --porcelain "${relativeFilePath}"`,
{ cwd: repoPath }
);
// Parse the output to extract author, email, date, etc.
const commitHash = blameOutput.split("\n")[0].split(" ")[0];
const authorLine = blameOutput
.split("\n")
.find((line) => line.startsWith("author "));
const emailLine = blameOutput
.split("\n")
.find((line) => line.startsWith("author-mail "));
const dateLine = blameOutput
.split("\n")
.find((line) => line.startsWith("author-time "));
const author = authorLine ? authorLine.replace("author ", "") : "Unknown";
const email = emailLine
? emailLine.replace("author-mail ", "").replace(/[<>]/g, "")
: "Unknown";
const timestamp = dateLine
? parseInt(dateLine.replace("author-time ", ""), 10) * 1000
: 0;
const date = timestamp ? new Date(timestamp).toISOString() : "Unknown";
// Get the commit message
const { stdout: messageOutput } = await promisify(exec)(
`git show -s --format=%B ${commitHash}`,
{ cwd: repoPath }
);
const message = messageOutput.trim();
return {
author,
email,
date,
commit: commitHash,
message,
};
} catch (error) {
// Return default values if git blame fails
return {
author: "Unknown",
email: "Unknown",
date: "Unknown",
commit: "Unknown",
message: "Unknown",
};
}
}
/**
* Enrich secrets with git blame information
* @param secrets Array of secrets to enrich
* @param scanOptions Optional scan options
* @returns Enriched secrets with author information
*/
export async function enrichSecretsWithBlameInfo(
secrets: Secret[],
scanOptions?: ScanOptions
): Promise<Secret[]> {
const enrichedSecrets = [];
for (const secret of secrets) {
try {
const blameInfo = await getGitBlameInfo(
secret.file,
secret.line,
scanOptions
);
// Add blame info to the secret
enrichedSecrets.push({
...secret,
author: blameInfo.author,
email: blameInfo.email,
date: blameInfo.date,
commit: blameInfo.commit,
message: blameInfo.message,
});
} catch (error) {
// If blame fails, keep the original secret
enrichedSecrets.push(secret);
}
}
return enrichedSecrets;
}