secure-scan-js
Version:
A JavaScript implementation of Yelp's detect-secrets tool - no Python required
760 lines (672 loc) • 21.8 kB
text/typescript
import { loadPyodide } from "pyodide";
import * as fs from "fs";
import * as path from "path";
import { ScanOptions, ScanResults, Secret } from "./types";
import {
runGitleaksScan,
scanRemoteRepository,
scanGitHistory,
enrichSecretsWithBlameInfo,
} from "./gitleaks";
// Declare Node.js globals
declare const __dirname: string;
declare const process: {
cwd(): string;
platform: string;
};
// Global Pyodide instance
let pyodideInstance: any = null;
let isInitialized = false;
let isInitializing = false;
// Constants
const DEFAULT_MAX_FILE_SIZE = 0; // No default file size limit (0 means no limit)
const BINARY_FILE_EXTENSIONS = [
".pack",
".gz",
".zip",
".jar",
".war",
".ear",
".class",
".so",
".dll",
".exe",
".obj",
".o",
".a",
".lib",
".pyc",
".pyo",
".jpg",
".jpeg",
".png",
".gif",
".bmp",
".ico",
".tif",
".tiff",
".mp3",
".mp4",
".avi",
".mov",
".wmv",
".flv",
".pdf",
".doc",
".docx",
".xls",
".xlsx",
".ppt",
".pptx",
];
// Helper function to check if a file is likely binary
function isLikelyBinaryFile(
filePath: string,
fileSize: number,
options?: Partial<ScanOptions>
): boolean {
// Check file extension
const ext = path.extname(filePath).toLowerCase();
if (BINARY_FILE_EXTENSIONS.includes(ext)) {
return true;
}
// Check if it's in a binary-like directory
if (
filePath.includes("/.git/") ||
(!options?.includeNodeModules &&
(filePath.includes("/node_modules/") ||
filePath.includes("\\node_modules\\"))) ||
filePath.includes("/__pycache__/") ||
filePath.includes("/.next/") ||
filePath.includes("\\.next\\")
) {
return true;
}
// Try to read a small chunk to detect binary content
try {
const fd = fs.openSync(filePath, "r");
const buffer = Buffer.alloc(Math.min(4096, fileSize));
fs.readSync(fd, buffer, 0, buffer.length, 0);
fs.closeSync(fd);
// Check for null bytes which often indicate binary data
for (let i = 0; i < buffer.length; i++) {
if (buffer[i] === 0) {
return true;
}
}
// Try to decode as UTF-8 - if it fails, likely binary
try {
buffer.toString("utf8");
} catch (e) {
return true;
}
} catch (e) {
// If we can't read the file, assume it's not binary
return false;
}
return false;
}
/**
* Initialize the WebAssembly module and Python environment
*/
export async function initialize(): Promise<void> {
if (isInitialized) return;
if (isInitializing) {
// Wait for initialization to complete if already in progress
while (isInitializing) {
await new Promise((resolve) => setTimeout(resolve, 100));
}
return;
}
try {
isInitializing = true;
// Load Pyodide - use the default CDN path
console.log("Loading Pyodide...");
pyodideInstance = await loadPyodide();
// Load micropip package
console.log("Loading micropip...");
await pyodideInstance.loadPackage(["micropip", "packaging"]);
// Load the secret patterns module
console.log("Loading secret patterns module...");
const secretPatternsCode = fs.readFileSync(
path.join(__dirname, "python", "secret_patterns.py"),
"utf-8"
);
// Create the secret_patterns module in the Python namespace
pyodideInstance.globals.set("secret_patterns_code", secretPatternsCode);
await pyodideInstance.runPythonAsync(`
import sys
from types import ModuleType
# Create the secret_patterns module
secret_patterns = ModuleType('secret_patterns')
sys.modules['secret_patterns'] = secret_patterns
# Execute the secret_patterns code in the module's namespace
exec(secret_patterns_code, secret_patterns.__dict__)
`);
// Load the advanced analyzer module
console.log("Loading advanced analyzer module...");
const advancedAnalyzerCode = fs.readFileSync(
path.join(__dirname, "python", "advanced_analyzer.py"),
"utf-8"
);
// Create the advanced_analyzer module in the Python namespace
pyodideInstance.globals.set("advanced_analyzer_code", advancedAnalyzerCode);
await pyodideInstance.runPythonAsync(`
# Create the advanced_analyzer module
advanced_analyzer = ModuleType('advanced_analyzer')
sys.modules['advanced_analyzer'] = advanced_analyzer
# Execute the advanced_analyzer code in the module's namespace
exec(advanced_analyzer_code, advanced_analyzer.__dict__)
`);
// Load the enhanced detector module
console.log("Loading enhanced detector module...");
const enhancedDetectorCode = fs.readFileSync(
path.join(__dirname, "python", "enhanced_detector.py"),
"utf-8"
);
// Create the enhanced_detector module in the Python namespace
pyodideInstance.globals.set("enhanced_detector_code", enhancedDetectorCode);
await pyodideInstance.runPythonAsync(`
# Create the enhanced_detector module
enhanced_detector = ModuleType('enhanced_detector')
sys.modules['enhanced_detector'] = enhanced_detector
# Execute the enhanced_detector code in the module's namespace
exec(enhanced_detector_code, enhanced_detector.__dict__)
`);
// Load the multi-language detector module
console.log("Loading multi-language detector module...");
const multiLanguageDetectorCode = fs.readFileSync(
path.join(__dirname, "python", "multi_language_detector.py"),
"utf-8"
);
// Create the multi_language_detector module in the Python namespace
pyodideInstance.globals.set(
"multi_language_detector_code",
multiLanguageDetectorCode
);
await pyodideInstance.runPythonAsync(`
# Create the multi_language_detector module
multi_language_detector = ModuleType('multi_language_detector')
sys.modules['multi_language_detector'] = multi_language_detector
# Execute the multi_language_detector code in the module's namespace
exec(multi_language_detector_code, multi_language_detector.__dict__)
`);
// Load the heuristic detector module
console.log("Loading heuristic detector module...");
const heuristicDetectorCode = fs.readFileSync(
path.join(__dirname, "python", "heuristic_detector.py"),
"utf-8"
);
// Create the heuristic_detector module in the Python namespace
pyodideInstance.globals.set(
"heuristic_detector_code",
heuristicDetectorCode
);
await pyodideInstance.runPythonAsync(`
# Create the heuristic_detector module
heuristic_detector = ModuleType('heuristic_detector')
sys.modules['heuristic_detector'] = heuristic_detector
# Execute the heuristic_detector code in the module's namespace
exec(heuristic_detector_code, heuristic_detector.__dict__)
`);
// Load the yelp scanner module
console.log("Loading yelp scanner module...");
const yelpScannerCode = fs.readFileSync(
path.join(__dirname, "python", "yelp_scanner.py"),
"utf-8"
);
// Create the yelp_scanner module in the Python namespace
pyodideInstance.globals.set("yelp_scanner_code", yelpScannerCode);
await pyodideInstance.runPythonAsync(`
# Create the yelp_scanner module
yelp_scanner = ModuleType('yelp_scanner')
sys.modules['yelp_scanner'] = yelp_scanner
# Execute the yelp_scanner code in the module's namespace
exec(yelp_scanner_code, yelp_scanner.__dict__)
`);
// Load the Python wrapper module
console.log("Setting up Python environment...");
const pythonCode = fs.readFileSync(
path.join(__dirname, "python", "detect_secrets_wrapper.py"),
"utf-8"
);
// Run the Python code to define the module
await pyodideInstance.runPythonAsync(pythonCode);
// Initialize the scanner (install dependencies)
console.log("Installing Python dependencies (this may take a moment)...");
await pyodideInstance.runPythonAsync("await initialize()");
isInitialized = true;
console.log("Initialization complete");
} catch (error: unknown) {
console.error("Failed to initialize:", error);
const errorMessage = error instanceof Error ? error.message : String(error);
throw new Error(`Failed to initialize WebAssembly module: ${errorMessage}`);
} finally {
isInitializing = false;
}
}
/**
* Scan a file or string content for secrets
* @param content The file content to scan
* @param filePath The path of the file (for reporting)
* @param options Scan options
* @returns Scan results
*/
export async function scanContent(
content: string,
filePath: string,
options: Partial<ScanOptions> = {}
): Promise<ScanResults> {
if (!isInitialized) {
await initialize();
}
try {
// Get the max file size from options
const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE;
// If max file size is set and content is too large, truncate it to prevent memory issues
let truncated = false;
if (maxFileSize > 0 && content.length > maxFileSize) {
content = content.substring(0, maxFileSize);
truncated = true;
console.warn(
`File ${filePath} is too large, scanning only the first ${maxFileSize} bytes`
);
}
// Convert options to a Python-compatible format
const checkMissed = options.checkMissed ? true : false;
// Set up Python variables
pyodideInstance.globals.set("js_file_content", content);
pyodideInstance.globals.set("js_file_path", filePath);
pyodideInstance.globals.set("js_check_missed", checkMissed);
// Call the Python scan_file function
await pyodideInstance.runPythonAsync(`
import json
try:
result_json = scan_file(js_file_content, js_file_path, js_check_missed)
js_result = result_json
except Exception as e:
import traceback
error_msg = traceback.format_exc()
print(f"Python error: {str(e)}\\n{error_msg}")
js_result = json.dumps({"error": str(e), "secrets": [], "missed_secrets": []})
`);
// Get the result from Python
const resultJson = pyodideInstance.globals.get("js_result");
// Check if we got a valid result
if (!resultJson) {
throw new Error("No result returned from Python scanner");
}
// Parse the results
const results = JSON.parse(resultJson);
// Check if there was an error
if (results.error) {
throw new Error(`Python error: ${results.error}`);
}
// Add a note if the file was truncated
if (truncated) {
results.truncated = true;
}
return results;
} catch (error: unknown) {
console.error("Error scanning content:", error);
const errorMessage = error instanceof Error ? error.message : String(error);
throw new Error(`Failed to scan content: ${errorMessage}`);
}
}
/**
* Scan a file for secrets
* @param filePath The path of the file to scan
* @param options Scan options
* @returns Scan results
*/
export async function scanFile(
filePath: string,
options: Partial<ScanOptions> = {}
): Promise<ScanResults> {
try {
// Skip node_modules files unless explicitly included
if (
(filePath.includes("/node_modules/") ||
filePath.includes("\\node_modules\\")) &&
!options.includeNodeModules
) {
if (options.verbose) {
console.log(`Skipping node_modules file: ${filePath}`);
}
return { secrets: [], missed_secrets: [] };
}
// Skip .next build files
if (filePath.includes("/.next/") || filePath.includes("\\.next\\")) {
if (options.verbose) {
console.log(`Skipping Next.js build file: ${filePath}`);
}
return { secrets: [], missed_secrets: [] };
}
// Get file stats
const stats = fs.statSync(filePath);
// Skip directories
if (stats.isDirectory()) {
return { secrets: [], missed_secrets: [] };
}
// Get the max file size from options
const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE;
// Check if it's a binary file
if (isLikelyBinaryFile(filePath, stats.size, options)) {
console.log(`Skipping likely binary file: ${filePath}`);
return { secrets: [], missed_secrets: [] };
}
// Skip large files if a limit is set and limitFileSize option is true
if (maxFileSize > 0 && stats.size > maxFileSize && options.limitFileSize) {
console.log(
`Skipping large file (${Math.round(stats.size / 1024)}KB): ${filePath}`
);
return { secrets: [], missed_secrets: [] };
}
// Read and scan the file
const content = fs.readFileSync(filePath, "utf-8");
return scanContent(content, filePath, options);
} catch (error: unknown) {
console.warn(
`Skipping file ${filePath}: ${
error instanceof Error ? error.message : String(error)
}`
);
return { secrets: [], missed_secrets: [] };
}
}
/**
* Scan a directory for secrets
* @param directory The directory to scan
* @param options Scan options
* @returns Scan results
*/
export async function scanDirectory(
directory: string = process.cwd(),
options: Partial<ScanOptions> = {}
): Promise<ScanResults> {
if (!isInitialized) {
await initialize();
}
const results: ScanResults = {
secrets: [],
missed_secrets: [],
};
// Default excluded directories if none provided
const defaultExcludeDirs = [
"node_modules",
".git",
"dist",
"build",
"coverage",
".next",
"__pycache__",
".venv",
"venv",
"env",
".env",
];
// Default excluded file patterns if none provided
const defaultExcludeFiles = [
"*.min.js",
"*.min.css",
"*.map",
"*.lock",
"package-lock.json",
"yarn.lock",
"pnpm-lock.yaml",
"*.svg",
"*.woff",
"*.ttf",
"*.eot",
"*.jpg",
"*.jpeg",
"*.png",
"*.gif",
"*.ico",
"*.pdf",
"*.zip",
"*.tar.gz",
];
// Get all files in the directory
const getFiles = (dir: string, excludeDirs: string[] = []): string[] => {
let files: string[] = [];
try {
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
// Skip excluded directories
if (entry.isDirectory()) {
const excludePatterns = [
...defaultExcludeDirs,
...(options.excludeDirs || []),
];
if (
excludePatterns.some(
(pattern) =>
new RegExp(`^${pattern.replace(/\*/g, ".*")}$`).test(
entry.name
) || entry.name === pattern
)
) {
continue;
}
try {
files = files.concat(getFiles(fullPath, excludeDirs));
} catch (err) {
console.warn(
`Skipping directory ${fullPath}: ${
err instanceof Error ? err.message : String(err)
}`
);
}
} else {
// Skip excluded files
const excludePatterns = [
...defaultExcludeFiles,
...(options.excludeFiles || []),
];
if (
excludePatterns.some((pattern) => {
const regex = new RegExp(
`^${pattern.replace(/\./g, "\\.").replace(/\*/g, ".*")}$`
);
return regex.test(entry.name);
})
) {
continue;
}
files.push(fullPath);
}
}
} catch (err) {
console.warn(
`Error reading directory ${dir}: ${
err instanceof Error ? err.message : String(err)
}`
);
}
return files;
};
const files = getFiles(directory, options.excludeDirs);
// Track if any files were truncated
let anyTruncated = false;
// Scan each file with error handling for individual files
for (const file of files) {
try {
const fileResults = await scanFile(file, options);
// Check if this file was truncated
if (fileResults.truncated) {
anyTruncated = true;
}
results.secrets = results.secrets.concat(fileResults.secrets);
results.missed_secrets = results.missed_secrets.concat(
fileResults.missed_secrets
);
} catch (error: unknown) {
console.warn(
`Skipping file ${file}: ${
error instanceof Error ? error.message : String(error)
}`
);
}
}
// Set the truncated flag if any files were truncated
if (anyTruncated) {
results.truncated = true;
}
// If we have secrets and the enrichWithGitInfo option is true, add blame info
if (
results.secrets.length > 0 &&
(options.enrichWithGitInfo || options.enrichWithGitInfo === undefined)
) {
try {
console.log("Enriching secrets with git blame information...");
results.secrets = await enrichSecretsWithBlameInfo(
results.secrets,
options
);
console.log("Git blame enrichment complete");
} catch (error) {
console.warn(`Failed to enrich secrets with git blame info: ${error}`);
}
}
// Add detectedBy field to all secrets
results.secrets = results.secrets.map((secret) => ({
...secret,
detectedBy: "detect-secrets",
}));
console.log(results),"<<<<<----secrets results Package";
return results;
}
/**
* Scan a directory using both detect-secrets and Gitleaks
* @param directory The directory to scan
* @param options Scan options
* @returns Combined scan results
*/
export async function scanWithBothScanners(
directory: string = process.cwd(),
options: Partial<ScanOptions> = {}
): Promise<ScanResults> {
console.log(
"Starting both detect-secrets and Gitleaks scanners in parallel..."
);
// Run both scanners in parallel
const [detectSecretsResults, gitleaksResults] = await Promise.all([
// First promise: run detect-secrets
(async () => {
try {
console.log("Running detect-secrets scanner...");
const results = await scanDirectory(directory, options);
console.log(`detect-secrets found ${results.secrets.length} secrets`);
return results;
} catch (error) {
console.error(
`detect-secrets scan failed: ${
error instanceof Error ? error.message : String(error)
}`
);
return { secrets: [], missed_secrets: [] };
}
})(),
// Second promise: run Gitleaks
(async () => {
try {
console.log("Running Gitleaks scanner...");
const results = await runGitleaksScan(directory, options);
console.log(`Gitleaks found ${results.secrets.length} secrets`);
return results;
} catch (error) {
if (
error instanceof Error &&
error.message.includes("Gitleaks is not installed")
) {
console.error(
`Gitleaks is not installed. Please install it following the instructions at https://github.com/zricethezav/gitleaks#installation`
);
} else {
console.error(
`Gitleaks scan failed: ${
error instanceof Error ? error.message : String(error)
}`
);
}
return { secrets: [], missed_secrets: [] };
}
})(),
]);
// Create a map to store unique secrets
const uniqueSecrets = new Map<string, Secret>();
// Helper function to normalize file paths to use forward slashes
const normalizePath = (filePath: string): string => {
return filePath.replace(/\\/g, "/");
};
// Helper function to create a unique key for a secret
const createSecretKey = (secret: Secret): string => {
// Normalize the file path and use file and line for uniqueness
return `${normalizePath(secret.file)}:${secret.line}`;
};
// Helper function to merge secret types while preserving all unique types
const mergeSecretTypes = (types1: string[], types2: string[]): string[] => {
return [...new Set([...types1, ...types2])].sort();
};
// Add detect-secrets results first
detectSecretsResults.secrets.forEach((secret) => {
const key = createSecretKey(secret);
uniqueSecrets.set(key, {
...secret,
file: normalizePath(secret.file), // Normalize the path
detectedBy: "detect-secrets",
});
});
// Add Gitleaks results, merging with existing entries if they exist
gitleaksResults.secrets.forEach((secret) => {
const key = createSecretKey(secret);
if (uniqueSecrets.has(key)) {
// Merge with existing secret
const existing = uniqueSecrets.get(key)!;
uniqueSecrets.set(key, {
...existing,
types: mergeSecretTypes(existing.types, secret.types),
detectedBy: "both",
// Keep additional metadata from Gitleaks if available
author: secret.author || existing.author,
email: secret.email || existing.email,
date: secret.date || existing.date,
commit: secret.commit || existing.commit,
message: secret.message || existing.message,
hashed_secret: secret.hashed_secret || existing.hashed_secret,
});
} else {
// Add new secret
uniqueSecrets.set(key, {
...secret,
file: normalizePath(secret.file), // Normalize the path
detectedBy: "gitleaks",
});
}
});
// Convert map to array and sort by file and line number
const mergedSecrets = Array.from(uniqueSecrets.values()).sort((a, b) => {
if (a.file !== b.file) return a.file.localeCompare(b.file);
return a.line - b.line;
});
return {
secrets: mergedSecrets,
missed_secrets: [
...detectSecretsResults.missed_secrets,
...gitleaksResults.missed_secrets,
],
truncated: detectSecretsResults.truncated || gitleaksResults.truncated,
};
}
export default {
initialize,
scanContent,
scanFile,
scanDirectory,
scanWithBothScanners,
runGitleaksScan,
scanRemoteRepository,
scanGitHistory,
};