UNPKG

secure-scan-js

Version:

A JavaScript implementation of Yelp's detect-secrets tool - no Python required

760 lines (672 loc) 21.8 kB
import { loadPyodide } from "pyodide"; import * as fs from "fs"; import * as path from "path"; import { ScanOptions, ScanResults, Secret } from "./types"; import { runGitleaksScan, scanRemoteRepository, scanGitHistory, enrichSecretsWithBlameInfo, } from "./gitleaks"; // Declare Node.js globals declare const __dirname: string; declare const process: { cwd(): string; platform: string; }; // Global Pyodide instance let pyodideInstance: any = null; let isInitialized = false; let isInitializing = false; // Constants const DEFAULT_MAX_FILE_SIZE = 0; // No default file size limit (0 means no limit) const BINARY_FILE_EXTENSIONS = [ ".pack", ".gz", ".zip", ".jar", ".war", ".ear", ".class", ".so", ".dll", ".exe", ".obj", ".o", ".a", ".lib", ".pyc", ".pyo", ".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico", ".tif", ".tiff", ".mp3", ".mp4", ".avi", ".mov", ".wmv", ".flv", ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ]; // Helper function to check if a file is likely binary function isLikelyBinaryFile( filePath: string, fileSize: number, options?: Partial<ScanOptions> ): boolean { // Check file extension const ext = path.extname(filePath).toLowerCase(); if (BINARY_FILE_EXTENSIONS.includes(ext)) { return true; } // Check if it's in a binary-like directory if ( filePath.includes("/.git/") || (!options?.includeNodeModules && (filePath.includes("/node_modules/") || filePath.includes("\\node_modules\\"))) || filePath.includes("/__pycache__/") || filePath.includes("/.next/") || filePath.includes("\\.next\\") ) { return true; } // Try to read a small chunk to detect binary content try { const fd = fs.openSync(filePath, "r"); const buffer = Buffer.alloc(Math.min(4096, fileSize)); fs.readSync(fd, buffer, 0, buffer.length, 0); fs.closeSync(fd); // Check for null bytes which often indicate binary data for (let i = 0; i < buffer.length; i++) { if (buffer[i] === 0) { return true; } } // Try to decode as UTF-8 - if it fails, likely binary try { buffer.toString("utf8"); } catch (e) { return true; } } catch (e) { // If we can't read the file, assume it's not binary return false; } return false; } /** * Initialize the WebAssembly module and Python environment */ export async function initialize(): Promise<void> { if (isInitialized) return; if (isInitializing) { // Wait for initialization to complete if already in progress while (isInitializing) { await new Promise((resolve) => setTimeout(resolve, 100)); } return; } try { isInitializing = true; // Load Pyodide - use the default CDN path console.log("Loading Pyodide..."); pyodideInstance = await loadPyodide(); // Load micropip package console.log("Loading micropip..."); await pyodideInstance.loadPackage(["micropip", "packaging"]); // Load the secret patterns module console.log("Loading secret patterns module..."); const secretPatternsCode = fs.readFileSync( path.join(__dirname, "python", "secret_patterns.py"), "utf-8" ); // Create the secret_patterns module in the Python namespace pyodideInstance.globals.set("secret_patterns_code", secretPatternsCode); await pyodideInstance.runPythonAsync(` import sys from types import ModuleType # Create the secret_patterns module secret_patterns = ModuleType('secret_patterns') sys.modules['secret_patterns'] = secret_patterns # Execute the secret_patterns code in the module's namespace exec(secret_patterns_code, secret_patterns.__dict__) `); // Load the advanced analyzer module console.log("Loading advanced analyzer module..."); const advancedAnalyzerCode = fs.readFileSync( path.join(__dirname, "python", "advanced_analyzer.py"), "utf-8" ); // Create the advanced_analyzer module in the Python namespace pyodideInstance.globals.set("advanced_analyzer_code", advancedAnalyzerCode); await pyodideInstance.runPythonAsync(` # Create the advanced_analyzer module advanced_analyzer = ModuleType('advanced_analyzer') sys.modules['advanced_analyzer'] = advanced_analyzer # Execute the advanced_analyzer code in the module's namespace exec(advanced_analyzer_code, advanced_analyzer.__dict__) `); // Load the enhanced detector module console.log("Loading enhanced detector module..."); const enhancedDetectorCode = fs.readFileSync( path.join(__dirname, "python", "enhanced_detector.py"), "utf-8" ); // Create the enhanced_detector module in the Python namespace pyodideInstance.globals.set("enhanced_detector_code", enhancedDetectorCode); await pyodideInstance.runPythonAsync(` # Create the enhanced_detector module enhanced_detector = ModuleType('enhanced_detector') sys.modules['enhanced_detector'] = enhanced_detector # Execute the enhanced_detector code in the module's namespace exec(enhanced_detector_code, enhanced_detector.__dict__) `); // Load the multi-language detector module console.log("Loading multi-language detector module..."); const multiLanguageDetectorCode = fs.readFileSync( path.join(__dirname, "python", "multi_language_detector.py"), "utf-8" ); // Create the multi_language_detector module in the Python namespace pyodideInstance.globals.set( "multi_language_detector_code", multiLanguageDetectorCode ); await pyodideInstance.runPythonAsync(` # Create the multi_language_detector module multi_language_detector = ModuleType('multi_language_detector') sys.modules['multi_language_detector'] = multi_language_detector # Execute the multi_language_detector code in the module's namespace exec(multi_language_detector_code, multi_language_detector.__dict__) `); // Load the heuristic detector module console.log("Loading heuristic detector module..."); const heuristicDetectorCode = fs.readFileSync( path.join(__dirname, "python", "heuristic_detector.py"), "utf-8" ); // Create the heuristic_detector module in the Python namespace pyodideInstance.globals.set( "heuristic_detector_code", heuristicDetectorCode ); await pyodideInstance.runPythonAsync(` # Create the heuristic_detector module heuristic_detector = ModuleType('heuristic_detector') sys.modules['heuristic_detector'] = heuristic_detector # Execute the heuristic_detector code in the module's namespace exec(heuristic_detector_code, heuristic_detector.__dict__) `); // Load the yelp scanner module console.log("Loading yelp scanner module..."); const yelpScannerCode = fs.readFileSync( path.join(__dirname, "python", "yelp_scanner.py"), "utf-8" ); // Create the yelp_scanner module in the Python namespace pyodideInstance.globals.set("yelp_scanner_code", yelpScannerCode); await pyodideInstance.runPythonAsync(` # Create the yelp_scanner module yelp_scanner = ModuleType('yelp_scanner') sys.modules['yelp_scanner'] = yelp_scanner # Execute the yelp_scanner code in the module's namespace exec(yelp_scanner_code, yelp_scanner.__dict__) `); // Load the Python wrapper module console.log("Setting up Python environment..."); const pythonCode = fs.readFileSync( path.join(__dirname, "python", "detect_secrets_wrapper.py"), "utf-8" ); // Run the Python code to define the module await pyodideInstance.runPythonAsync(pythonCode); // Initialize the scanner (install dependencies) console.log("Installing Python dependencies (this may take a moment)..."); await pyodideInstance.runPythonAsync("await initialize()"); isInitialized = true; console.log("Initialization complete"); } catch (error: unknown) { console.error("Failed to initialize:", error); const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Failed to initialize WebAssembly module: ${errorMessage}`); } finally { isInitializing = false; } } /** * Scan a file or string content for secrets * @param content The file content to scan * @param filePath The path of the file (for reporting) * @param options Scan options * @returns Scan results */ export async function scanContent( content: string, filePath: string, options: Partial<ScanOptions> = {} ): Promise<ScanResults> { if (!isInitialized) { await initialize(); } try { // Get the max file size from options const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE; // If max file size is set and content is too large, truncate it to prevent memory issues let truncated = false; if (maxFileSize > 0 && content.length > maxFileSize) { content = content.substring(0, maxFileSize); truncated = true; console.warn( `File ${filePath} is too large, scanning only the first ${maxFileSize} bytes` ); } // Convert options to a Python-compatible format const checkMissed = options.checkMissed ? true : false; // Set up Python variables pyodideInstance.globals.set("js_file_content", content); pyodideInstance.globals.set("js_file_path", filePath); pyodideInstance.globals.set("js_check_missed", checkMissed); // Call the Python scan_file function await pyodideInstance.runPythonAsync(` import json try: result_json = scan_file(js_file_content, js_file_path, js_check_missed) js_result = result_json except Exception as e: import traceback error_msg = traceback.format_exc() print(f"Python error: {str(e)}\\n{error_msg}") js_result = json.dumps({"error": str(e), "secrets": [], "missed_secrets": []}) `); // Get the result from Python const resultJson = pyodideInstance.globals.get("js_result"); // Check if we got a valid result if (!resultJson) { throw new Error("No result returned from Python scanner"); } // Parse the results const results = JSON.parse(resultJson); // Check if there was an error if (results.error) { throw new Error(`Python error: ${results.error}`); } // Add a note if the file was truncated if (truncated) { results.truncated = true; } return results; } catch (error: unknown) { console.error("Error scanning content:", error); const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Failed to scan content: ${errorMessage}`); } } /** * Scan a file for secrets * @param filePath The path of the file to scan * @param options Scan options * @returns Scan results */ export async function scanFile( filePath: string, options: Partial<ScanOptions> = {} ): Promise<ScanResults> { try { // Skip node_modules files unless explicitly included if ( (filePath.includes("/node_modules/") || filePath.includes("\\node_modules\\")) && !options.includeNodeModules ) { if (options.verbose) { console.log(`Skipping node_modules file: ${filePath}`); } return { secrets: [], missed_secrets: [] }; } // Skip .next build files if (filePath.includes("/.next/") || filePath.includes("\\.next\\")) { if (options.verbose) { console.log(`Skipping Next.js build file: ${filePath}`); } return { secrets: [], missed_secrets: [] }; } // Get file stats const stats = fs.statSync(filePath); // Skip directories if (stats.isDirectory()) { return { secrets: [], missed_secrets: [] }; } // Get the max file size from options const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE; // Check if it's a binary file if (isLikelyBinaryFile(filePath, stats.size, options)) { console.log(`Skipping likely binary file: ${filePath}`); return { secrets: [], missed_secrets: [] }; } // Skip large files if a limit is set and limitFileSize option is true if (maxFileSize > 0 && stats.size > maxFileSize && options.limitFileSize) { console.log( `Skipping large file (${Math.round(stats.size / 1024)}KB): ${filePath}` ); return { secrets: [], missed_secrets: [] }; } // Read and scan the file const content = fs.readFileSync(filePath, "utf-8"); return scanContent(content, filePath, options); } catch (error: unknown) { console.warn( `Skipping file ${filePath}: ${ error instanceof Error ? error.message : String(error) }` ); return { secrets: [], missed_secrets: [] }; } } /** * Scan a directory for secrets * @param directory The directory to scan * @param options Scan options * @returns Scan results */ export async function scanDirectory( directory: string = process.cwd(), options: Partial<ScanOptions> = {} ): Promise<ScanResults> { if (!isInitialized) { await initialize(); } const results: ScanResults = { secrets: [], missed_secrets: [], }; // Default excluded directories if none provided const defaultExcludeDirs = [ "node_modules", ".git", "dist", "build", "coverage", ".next", "__pycache__", ".venv", "venv", "env", ".env", ]; // Default excluded file patterns if none provided const defaultExcludeFiles = [ "*.min.js", "*.min.css", "*.map", "*.lock", "package-lock.json", "yarn.lock", "pnpm-lock.yaml", "*.svg", "*.woff", "*.ttf", "*.eot", "*.jpg", "*.jpeg", "*.png", "*.gif", "*.ico", "*.pdf", "*.zip", "*.tar.gz", ]; // Get all files in the directory const getFiles = (dir: string, excludeDirs: string[] = []): string[] => { let files: string[] = []; try { const entries = fs.readdirSync(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dir, entry.name); // Skip excluded directories if (entry.isDirectory()) { const excludePatterns = [ ...defaultExcludeDirs, ...(options.excludeDirs || []), ]; if ( excludePatterns.some( (pattern) => new RegExp(`^${pattern.replace(/\*/g, ".*")}$`).test( entry.name ) || entry.name === pattern ) ) { continue; } try { files = files.concat(getFiles(fullPath, excludeDirs)); } catch (err) { console.warn( `Skipping directory ${fullPath}: ${ err instanceof Error ? err.message : String(err) }` ); } } else { // Skip excluded files const excludePatterns = [ ...defaultExcludeFiles, ...(options.excludeFiles || []), ]; if ( excludePatterns.some((pattern) => { const regex = new RegExp( `^${pattern.replace(/\./g, "\\.").replace(/\*/g, ".*")}$` ); return regex.test(entry.name); }) ) { continue; } files.push(fullPath); } } } catch (err) { console.warn( `Error reading directory ${dir}: ${ err instanceof Error ? err.message : String(err) }` ); } return files; }; const files = getFiles(directory, options.excludeDirs); // Track if any files were truncated let anyTruncated = false; // Scan each file with error handling for individual files for (const file of files) { try { const fileResults = await scanFile(file, options); // Check if this file was truncated if (fileResults.truncated) { anyTruncated = true; } results.secrets = results.secrets.concat(fileResults.secrets); results.missed_secrets = results.missed_secrets.concat( fileResults.missed_secrets ); } catch (error: unknown) { console.warn( `Skipping file ${file}: ${ error instanceof Error ? error.message : String(error) }` ); } } // Set the truncated flag if any files were truncated if (anyTruncated) { results.truncated = true; } // If we have secrets and the enrichWithGitInfo option is true, add blame info if ( results.secrets.length > 0 && (options.enrichWithGitInfo || options.enrichWithGitInfo === undefined) ) { try { console.log("Enriching secrets with git blame information..."); results.secrets = await enrichSecretsWithBlameInfo( results.secrets, options ); console.log("Git blame enrichment complete"); } catch (error) { console.warn(`Failed to enrich secrets with git blame info: ${error}`); } } // Add detectedBy field to all secrets results.secrets = results.secrets.map((secret) => ({ ...secret, detectedBy: "detect-secrets", })); console.log(results),"<<<<<----secrets results Package"; return results; } /** * Scan a directory using both detect-secrets and Gitleaks * @param directory The directory to scan * @param options Scan options * @returns Combined scan results */ export async function scanWithBothScanners( directory: string = process.cwd(), options: Partial<ScanOptions> = {} ): Promise<ScanResults> { console.log( "Starting both detect-secrets and Gitleaks scanners in parallel..." ); // Run both scanners in parallel const [detectSecretsResults, gitleaksResults] = await Promise.all([ // First promise: run detect-secrets (async () => { try { console.log("Running detect-secrets scanner..."); const results = await scanDirectory(directory, options); console.log(`detect-secrets found ${results.secrets.length} secrets`); return results; } catch (error) { console.error( `detect-secrets scan failed: ${ error instanceof Error ? error.message : String(error) }` ); return { secrets: [], missed_secrets: [] }; } })(), // Second promise: run Gitleaks (async () => { try { console.log("Running Gitleaks scanner..."); const results = await runGitleaksScan(directory, options); console.log(`Gitleaks found ${results.secrets.length} secrets`); return results; } catch (error) { if ( error instanceof Error && error.message.includes("Gitleaks is not installed") ) { console.error( `Gitleaks is not installed. Please install it following the instructions at https://github.com/zricethezav/gitleaks#installation` ); } else { console.error( `Gitleaks scan failed: ${ error instanceof Error ? error.message : String(error) }` ); } return { secrets: [], missed_secrets: [] }; } })(), ]); // Create a map to store unique secrets const uniqueSecrets = new Map<string, Secret>(); // Helper function to normalize file paths to use forward slashes const normalizePath = (filePath: string): string => { return filePath.replace(/\\/g, "/"); }; // Helper function to create a unique key for a secret const createSecretKey = (secret: Secret): string => { // Normalize the file path and use file and line for uniqueness return `${normalizePath(secret.file)}:${secret.line}`; }; // Helper function to merge secret types while preserving all unique types const mergeSecretTypes = (types1: string[], types2: string[]): string[] => { return [...new Set([...types1, ...types2])].sort(); }; // Add detect-secrets results first detectSecretsResults.secrets.forEach((secret) => { const key = createSecretKey(secret); uniqueSecrets.set(key, { ...secret, file: normalizePath(secret.file), // Normalize the path detectedBy: "detect-secrets", }); }); // Add Gitleaks results, merging with existing entries if they exist gitleaksResults.secrets.forEach((secret) => { const key = createSecretKey(secret); if (uniqueSecrets.has(key)) { // Merge with existing secret const existing = uniqueSecrets.get(key)!; uniqueSecrets.set(key, { ...existing, types: mergeSecretTypes(existing.types, secret.types), detectedBy: "both", // Keep additional metadata from Gitleaks if available author: secret.author || existing.author, email: secret.email || existing.email, date: secret.date || existing.date, commit: secret.commit || existing.commit, message: secret.message || existing.message, hashed_secret: secret.hashed_secret || existing.hashed_secret, }); } else { // Add new secret uniqueSecrets.set(key, { ...secret, file: normalizePath(secret.file), // Normalize the path detectedBy: "gitleaks", }); } }); // Convert map to array and sort by file and line number const mergedSecrets = Array.from(uniqueSecrets.values()).sort((a, b) => { if (a.file !== b.file) return a.file.localeCompare(b.file); return a.line - b.line; }); return { secrets: mergedSecrets, missed_secrets: [ ...detectSecretsResults.missed_secrets, ...gitleaksResults.missed_secrets, ], truncated: detectSecretsResults.truncated || gitleaksResults.truncated, }; } export default { initialize, scanContent, scanFile, scanDirectory, scanWithBothScanners, runGitleaksScan, scanRemoteRepository, scanGitHistory, };