UNPKG

detect-secrets-js

Version:

A JavaScript implementation of Yelp's detect-secrets tool - no Python required

647 lines (575 loc) 17.6 kB
import { loadPyodide } from "pyodide"; import * as fs from "fs"; import * as path from "path"; import { ScanOptions, ScanResults } from "./types"; import { runGitleaksScan, scanRemoteRepository, scanGitHistory, enrichSecretsWithBlameInfo, } from "./gitleaks"; // Declare Node.js globals declare const __dirname: string; declare const process: { cwd(): string; platform: string; }; // Global Pyodide instance let pyodideInstance: any = null; let isInitialized = false; let isInitializing = false; // Constants const DEFAULT_MAX_FILE_SIZE = 0; // No default file size limit (0 means no limit) const BINARY_FILE_EXTENSIONS = [ ".pack", ".gz", ".zip", ".jar", ".war", ".ear", ".class", ".so", ".dll", ".exe", ".obj", ".o", ".a", ".lib", ".pyc", ".pyo", ".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico", ".tif", ".tiff", ".mp3", ".mp4", ".avi", ".mov", ".wmv", ".flv", ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ]; // Helper function to check if a file is likely binary function isLikelyBinaryFile( filePath: string, fileSize: number, options?: Partial<ScanOptions> ): boolean { // Check file extension const ext = path.extname(filePath).toLowerCase(); if (BINARY_FILE_EXTENSIONS.includes(ext)) { return true; } // Check if it's in a binary-like directory if ( filePath.includes("/.git/") || (!options?.includeNodeModules && (filePath.includes("/node_modules/") || filePath.includes("\\node_modules\\"))) || filePath.includes("/__pycache__/") || filePath.includes("/.next/") || filePath.includes("\\.next\\") ) { return true; } // Try to read a small chunk to detect binary content try { const fd = fs.openSync(filePath, "r"); const buffer = Buffer.alloc(Math.min(4096, fileSize)); fs.readSync(fd, buffer, 0, buffer.length, 0); fs.closeSync(fd); // Check for null bytes which often indicate binary data for (let i = 0; i < buffer.length; i++) { if (buffer[i] === 0) { return true; } } // Try to decode as UTF-8 - if it fails, likely binary try { buffer.toString("utf8"); } catch (e) { return true; } } catch (e) { // If we can't read the file, assume it's not binary return false; } return false; } /** * Initialize the WebAssembly module and Python environment */ export async function initialize(): Promise<void> { if (isInitialized) return; if (isInitializing) { // Wait for initialization to complete if already in progress while (isInitializing) { await new Promise((resolve) => setTimeout(resolve, 100)); } return; } try { isInitializing = true; // Load Pyodide - use the default CDN path console.log("Loading Pyodide..."); pyodideInstance = await loadPyodide(); // Load micropip package console.log("Loading micropip..."); await pyodideInstance.loadPackage(["micropip", "packaging"]); // Load the Python wrapper module console.log("Setting up Python environment..."); const pythonCode = fs.readFileSync( path.join(__dirname, "python", "detect_secrets_wrapper.py"), "utf-8" ); // Run the Python code to define the module await pyodideInstance.runPythonAsync(pythonCode); // Initialize the scanner (install dependencies) console.log("Installing Python dependencies (this may take a moment)..."); await pyodideInstance.runPythonAsync("await initialize()"); isInitialized = true; console.log("Initialization complete"); } catch (error: unknown) { console.error("Failed to initialize:", error); const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Failed to initialize WebAssembly module: ${errorMessage}`); } finally { isInitializing = false; } } /** * Scan a file or string content for secrets * @param content The file content to scan * @param filePath The path of the file (for reporting) * @param options Scan options * @returns Scan results */ export async function scanContent( content: string, filePath: string, options: Partial<ScanOptions> = {} ): Promise<ScanResults> { if (!isInitialized) { await initialize(); } try { // Get the max file size from options const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE; // If max file size is set and content is too large, truncate it to prevent memory issues let truncated = false; if (maxFileSize > 0 && content.length > maxFileSize) { content = content.substring(0, maxFileSize); truncated = true; console.warn( `File ${filePath} is too large, scanning only the first ${maxFileSize} bytes` ); } // Convert options to a Python-compatible format const checkMissed = options.checkMissed ? true : false; // Set up Python variables pyodideInstance.globals.set("js_file_content", content); pyodideInstance.globals.set("js_file_path", filePath); pyodideInstance.globals.set("js_check_missed", checkMissed); // Call the Python scan_file function await pyodideInstance.runPythonAsync(` import json try: result_json = scan_file(js_file_content, js_file_path, js_check_missed) js_result = result_json except Exception as e: import traceback error_msg = traceback.format_exc() print(f"Python error: {str(e)}\\n{error_msg}") js_result = json.dumps({"error": str(e), "secrets": [], "missed_secrets": []}) `); // Get the result from Python const resultJson = pyodideInstance.globals.get("js_result"); // Check if we got a valid result if (!resultJson) { throw new Error("No result returned from Python scanner"); } // Parse the results const results = JSON.parse(resultJson); // Check if there was an error if (results.error) { throw new Error(`Python error: ${results.error}`); } // Add a note if the file was truncated if (truncated) { results.truncated = true; } return results; } catch (error: unknown) { console.error("Error scanning content:", error); const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Failed to scan content: ${errorMessage}`); } } /** * Scan a file for secrets * @param filePath The path of the file to scan * @param options Scan options * @returns Scan results */ export async function scanFile( filePath: string, options: Partial<ScanOptions> = {} ): Promise<ScanResults> { try { // Skip node_modules files unless explicitly included if ( (filePath.includes("/node_modules/") || filePath.includes("\\node_modules\\")) && !options.includeNodeModules ) { if (options.verbose) { console.log(`Skipping node_modules file: ${filePath}`); } return { secrets: [], missed_secrets: [] }; } // Skip .next build files if (filePath.includes("/.next/") || filePath.includes("\\.next\\")) { if (options.verbose) { console.log(`Skipping Next.js build file: ${filePath}`); } return { secrets: [], missed_secrets: [] }; } // Get file stats const stats = fs.statSync(filePath); // Skip directories if (stats.isDirectory()) { return { secrets: [], missed_secrets: [] }; } // Get the max file size from options const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE; // Check if it's a binary file if (isLikelyBinaryFile(filePath, stats.size, options)) { console.log(`Skipping likely binary file: ${filePath}`); return { secrets: [], missed_secrets: [] }; } // Skip large files if a limit is set and limitFileSize option is true if (maxFileSize > 0 && stats.size > maxFileSize && options.limitFileSize) { console.log( `Skipping large file (${Math.round(stats.size / 1024)}KB): ${filePath}` ); return { secrets: [], missed_secrets: [] }; } // Read and scan the file const content = fs.readFileSync(filePath, "utf-8"); return scanContent(content, filePath, options); } catch (error: unknown) { console.warn( `Skipping file ${filePath}: ${ error instanceof Error ? error.message : String(error) }` ); return { secrets: [], missed_secrets: [] }; } } /** * Scan a directory for secrets * @param directory The directory to scan * @param options Scan options * @returns Scan results */ export async function scanDirectory( directory: string = process.cwd(), options: Partial<ScanOptions> = {} ): Promise<ScanResults> { if (!isInitialized) { await initialize(); } const results: ScanResults = { secrets: [], missed_secrets: [], }; // Default excluded directories if none provided const defaultExcludeDirs = [ "node_modules", ".git", "dist", "build", "coverage", ".next", "__pycache__", ".venv", "venv", "env", ".env", ]; // Default excluded file patterns if none provided const defaultExcludeFiles = [ "*.min.js", "*.min.css", "*.map", "*.lock", "package-lock.json", "yarn.lock", "pnpm-lock.yaml", "*.svg", "*.woff", "*.ttf", "*.eot", "*.jpg", "*.jpeg", "*.png", "*.gif", "*.ico", "*.pdf", "*.zip", "*.tar.gz", ]; // Get all files in the directory const getFiles = (dir: string, excludeDirs: string[] = []): string[] => { let files: string[] = []; try { const entries = fs.readdirSync(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dir, entry.name); // Skip excluded directories if (entry.isDirectory()) { const excludePatterns = [ ...defaultExcludeDirs, ...(options.excludeDirs || []), ]; if ( excludePatterns.some( (pattern) => new RegExp(`^${pattern.replace(/\*/g, ".*")}$`).test( entry.name ) || entry.name === pattern ) ) { continue; } try { files = files.concat(getFiles(fullPath, excludeDirs)); } catch (err) { console.warn( `Skipping directory ${fullPath}: ${ err instanceof Error ? err.message : String(err) }` ); } } else { // Skip excluded files const excludePatterns = [ ...defaultExcludeFiles, ...(options.excludeFiles || []), ]; if ( excludePatterns.some((pattern) => { const regex = new RegExp( `^${pattern.replace(/\./g, "\\.").replace(/\*/g, ".*")}$` ); return regex.test(entry.name); }) ) { continue; } files.push(fullPath); } } } catch (err) { console.warn( `Error reading directory ${dir}: ${ err instanceof Error ? err.message : String(err) }` ); } return files; }; const files = getFiles(directory, options.excludeDirs); // Track if any files were truncated let anyTruncated = false; // Scan each file with error handling for individual files for (const file of files) { try { const fileResults = await scanFile(file, options); // Check if this file was truncated if (fileResults.truncated) { anyTruncated = true; } results.secrets = results.secrets.concat(fileResults.secrets); results.missed_secrets = results.missed_secrets.concat( fileResults.missed_secrets ); } catch (error: unknown) { console.warn( `Skipping file ${file}: ${ error instanceof Error ? error.message : String(error) }` ); } } // Set the truncated flag if any files were truncated if (anyTruncated) { results.truncated = true; } // If we have secrets and the enrichWithGitInfo option is true, add blame info if ( results.secrets.length > 0 && (options.enrichWithGitInfo || options.enrichWithGitInfo === undefined) ) { try { console.log("Enriching secrets with git blame information..."); results.secrets = await enrichSecretsWithBlameInfo( results.secrets, options ); console.log("Git blame enrichment complete"); } catch (error) { console.warn(`Failed to enrich secrets with git blame info: ${error}`); } } return results; } /** * Scan a directory using both detect-secrets and Gitleaks * @param directory The directory to scan * @param options Scan options * @returns Combined scan results */ export async function scanWithBothScanners( directory: string = process.cwd(), options: Partial<ScanOptions> = {} ): Promise<ScanResults> { console.log( "Starting both detect-secrets and Gitleaks scanners in parallel..." ); // Run both scanners in parallel const [detectSecretsResults, gitleaksResults] = await Promise.all([ // First promise: run detect-secrets (async () => { try { console.log("Running detect-secrets scanner..."); const results = await scanDirectory(directory, options); console.log(`detect-secrets found ${results.secrets.length} secrets`); return results; } catch (error) { console.error( `detect-secrets scan failed: ${ error instanceof Error ? error.message : String(error) }` ); // Return empty results if detect-secrets fails return { secrets: [], missed_secrets: [] }; } })(), // Second promise: run Gitleaks (async () => { try { console.log("Running Gitleaks scanner..."); const results = await runGitleaksScan(directory, options); console.log(`Gitleaks found ${results.secrets.length} secrets`); return results; } catch (error) { // If Gitleaks is not installed, provide helpful instruction if ( error instanceof Error && error.message.includes("Gitleaks is not installed") ) { console.error( `Gitleaks is not installed. Please install it following the instructions at https://github.com/zricethezav/gitleaks#installation` ); } else { console.error( `Gitleaks scan failed: ${ error instanceof Error ? error.message : String(error) }` ); } // Return empty results if Gitleaks fails return { secrets: [], missed_secrets: [] }; } })(), ]); // The rest of the function remains the same - merging results const uniqueSecrets = new Map(); // Add detect-secrets results first detectSecretsResults.secrets.forEach((secret) => { const key = `${secret.file}:${secret.line}`; uniqueSecrets.set(key, secret); }); // Add Gitleaks results, merging with existing entries if they exist gitleaksResults.secrets.forEach((secret) => { const key = `${secret.file}:${secret.line}`; if (uniqueSecrets.has(key)) { // Merge with existing secret const existing = uniqueSecrets.get(key); existing.types = [...new Set([...existing.types, ...secret.types])]; if (secret.hashed_secret) { existing.hashed_secret = secret.hashed_secret; } // Merge git information if available from Gitleaks if (secret.author) existing.author = secret.author; if (secret.email) existing.email = secret.email; if (secret.date) existing.date = secret.date; if (secret.commit) existing.commit = secret.commit; if (secret.message) existing.message = secret.message; } else { // Add new secret uniqueSecrets.set(key, secret); } }); // Convert map to array let mergedSecrets = Array.from(uniqueSecrets.values()); // The enrichment logic remains unchanged if (options.enrichWithGitInfo !== false) { const secretsNeedingEnrichment = mergedSecrets.filter( (secret) => !secret.author || secret.author === "Unknown" ); if (secretsNeedingEnrichment.length > 0) { console.log( `Enriching ${secretsNeedingEnrichment.length} secrets with git blame information...` ); try { const enrichedSecrets = await enrichSecretsWithBlameInfo( secretsNeedingEnrichment, options ); // Replace the original secrets with the enriched ones mergedSecrets = mergedSecrets.map((secret) => { const enriched = enrichedSecrets.find( (s) => s.file === secret.file && s.line === secret.line ); return enriched || secret; }); console.log("Git blame enrichment complete"); } catch (error) { console.warn( `Failed to enrich some secrets with git blame info: ${error}` ); } } } console.log(`Merged results: ${mergedSecrets.length} unique secrets found`); return { secrets: mergedSecrets, missed_secrets: [ ...detectSecretsResults.missed_secrets, ...gitleaksResults.missed_secrets, ], truncated: detectSecretsResults.truncated, }; } export default { initialize, scanContent, scanFile, scanDirectory, scanWithBothScanners, runGitleaksScan, scanRemoteRepository, scanGitHistory, };