UNPKG

secure-scan-js

Version:

A JavaScript implementation of Yelp's detect-secrets tool - no Python required

625 lines (617 loc) 24.2 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.initialize = initialize; exports.scanContent = scanContent; exports.scanFile = scanFile; exports.scanDirectory = scanDirectory; exports.scanWithBothScanners = scanWithBothScanners; const pyodide_1 = require("pyodide"); const fs = __importStar(require("fs")); const path = __importStar(require("path")); const gitleaks_1 = require("./gitleaks"); // Global Pyodide instance let pyodideInstance = null; let isInitialized = false; let isInitializing = false; // Constants const DEFAULT_MAX_FILE_SIZE = 0; // No default file size limit (0 means no limit) const BINARY_FILE_EXTENSIONS = [ ".pack", ".gz", ".zip", ".jar", ".war", ".ear", ".class", ".so", ".dll", ".exe", ".obj", ".o", ".a", ".lib", ".pyc", ".pyo", ".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico", ".tif", ".tiff", ".mp3", ".mp4", ".avi", ".mov", ".wmv", ".flv", ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ]; // Helper function to check if a file is likely binary function isLikelyBinaryFile(filePath, fileSize, options) { // Check file extension const ext = path.extname(filePath).toLowerCase(); if (BINARY_FILE_EXTENSIONS.includes(ext)) { return true; } // Check if it's in a binary-like directory if (filePath.includes("/.git/") || (!options?.includeNodeModules && (filePath.includes("/node_modules/") || filePath.includes("\\node_modules\\"))) || filePath.includes("/__pycache__/") || filePath.includes("/.next/") || filePath.includes("\\.next\\")) { return true; } // Try to read a small chunk to detect binary content try { const fd = fs.openSync(filePath, "r"); const buffer = Buffer.alloc(Math.min(4096, fileSize)); fs.readSync(fd, buffer, 0, buffer.length, 0); fs.closeSync(fd); // Check for null bytes which often indicate binary data for (let i = 0; i < buffer.length; i++) { if (buffer[i] === 0) { return true; } } // Try to decode as UTF-8 - if it fails, likely binary try { buffer.toString("utf8"); } catch (e) { return true; } } catch (e) { // If we can't read the file, assume it's not binary return false; } return false; } /** * Initialize the WebAssembly module and Python environment */ async function initialize() { if (isInitialized) return; if (isInitializing) { // Wait for initialization to complete if already in progress while (isInitializing) { await new Promise((resolve) => setTimeout(resolve, 100)); } return; } try { isInitializing = true; // Load Pyodide - use the default CDN path console.log("Loading Pyodide..."); pyodideInstance = await (0, pyodide_1.loadPyodide)(); // Load micropip package console.log("Loading micropip..."); await pyodideInstance.loadPackage(["micropip", "packaging"]); // Load the secret patterns module console.log("Loading secret patterns module..."); const secretPatternsCode = fs.readFileSync(path.join(__dirname, "python", "secret_patterns.py"), "utf-8"); // Create the secret_patterns module in the Python namespace pyodideInstance.globals.set("secret_patterns_code", secretPatternsCode); await pyodideInstance.runPythonAsync(` import sys from types import ModuleType # Create the secret_patterns module secret_patterns = ModuleType('secret_patterns') sys.modules['secret_patterns'] = secret_patterns # Execute the secret_patterns code in the module's namespace exec(secret_patterns_code, secret_patterns.__dict__) `); // Load the advanced analyzer module console.log("Loading advanced analyzer module..."); const advancedAnalyzerCode = fs.readFileSync(path.join(__dirname, "python", "advanced_analyzer.py"), "utf-8"); // Create the advanced_analyzer module in the Python namespace pyodideInstance.globals.set("advanced_analyzer_code", advancedAnalyzerCode); await pyodideInstance.runPythonAsync(` # Create the advanced_analyzer module advanced_analyzer = ModuleType('advanced_analyzer') sys.modules['advanced_analyzer'] = advanced_analyzer # Execute the advanced_analyzer code in the module's namespace exec(advanced_analyzer_code, advanced_analyzer.__dict__) `); // Load the enhanced detector module console.log("Loading enhanced detector module..."); const enhancedDetectorCode = fs.readFileSync(path.join(__dirname, "python", "enhanced_detector.py"), "utf-8"); // Create the enhanced_detector module in the Python namespace pyodideInstance.globals.set("enhanced_detector_code", enhancedDetectorCode); await pyodideInstance.runPythonAsync(` # Create the enhanced_detector module enhanced_detector = ModuleType('enhanced_detector') sys.modules['enhanced_detector'] = enhanced_detector # Execute the enhanced_detector code in the module's namespace exec(enhanced_detector_code, enhanced_detector.__dict__) `); // Load the multi-language detector module console.log("Loading multi-language detector module..."); const multiLanguageDetectorCode = fs.readFileSync(path.join(__dirname, "python", "multi_language_detector.py"), "utf-8"); // Create the multi_language_detector module in the Python namespace pyodideInstance.globals.set("multi_language_detector_code", multiLanguageDetectorCode); await pyodideInstance.runPythonAsync(` # Create the multi_language_detector module multi_language_detector = ModuleType('multi_language_detector') sys.modules['multi_language_detector'] = multi_language_detector # Execute the multi_language_detector code in the module's namespace exec(multi_language_detector_code, multi_language_detector.__dict__) `); // Load the heuristic detector module console.log("Loading heuristic detector module..."); const heuristicDetectorCode = fs.readFileSync(path.join(__dirname, "python", "heuristic_detector.py"), "utf-8"); // Create the heuristic_detector module in the Python namespace pyodideInstance.globals.set("heuristic_detector_code", heuristicDetectorCode); await pyodideInstance.runPythonAsync(` # Create the heuristic_detector module heuristic_detector = ModuleType('heuristic_detector') sys.modules['heuristic_detector'] = heuristic_detector # Execute the heuristic_detector code in the module's namespace exec(heuristic_detector_code, heuristic_detector.__dict__) `); // Load the yelp scanner module console.log("Loading yelp scanner module..."); const yelpScannerCode = fs.readFileSync(path.join(__dirname, "python", "yelp_scanner.py"), "utf-8"); // Create the yelp_scanner module in the Python namespace pyodideInstance.globals.set("yelp_scanner_code", yelpScannerCode); await pyodideInstance.runPythonAsync(` # Create the yelp_scanner module yelp_scanner = ModuleType('yelp_scanner') sys.modules['yelp_scanner'] = yelp_scanner # Execute the yelp_scanner code in the module's namespace exec(yelp_scanner_code, yelp_scanner.__dict__) `); // Load the Python wrapper module console.log("Setting up Python environment..."); const pythonCode = fs.readFileSync(path.join(__dirname, "python", "detect_secrets_wrapper.py"), "utf-8"); // Run the Python code to define the module await pyodideInstance.runPythonAsync(pythonCode); // Initialize the scanner (install dependencies) console.log("Installing Python dependencies (this may take a moment)..."); await pyodideInstance.runPythonAsync("await initialize()"); isInitialized = true; console.log("Initialization complete"); } catch (error) { console.error("Failed to initialize:", error); const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Failed to initialize WebAssembly module: ${errorMessage}`); } finally { isInitializing = false; } } /** * Scan a file or string content for secrets * @param content The file content to scan * @param filePath The path of the file (for reporting) * @param options Scan options * @returns Scan results */ async function scanContent(content, filePath, options = {}) { if (!isInitialized) { await initialize(); } try { // Get the max file size from options const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE; // If max file size is set and content is too large, truncate it to prevent memory issues let truncated = false; if (maxFileSize > 0 && content.length > maxFileSize) { content = content.substring(0, maxFileSize); truncated = true; console.warn(`File ${filePath} is too large, scanning only the first ${maxFileSize} bytes`); } // Convert options to a Python-compatible format const checkMissed = options.checkMissed ? true : false; // Set up Python variables pyodideInstance.globals.set("js_file_content", content); pyodideInstance.globals.set("js_file_path", filePath); pyodideInstance.globals.set("js_check_missed", checkMissed); // Call the Python scan_file function await pyodideInstance.runPythonAsync(` import json try: result_json = scan_file(js_file_content, js_file_path, js_check_missed) js_result = result_json except Exception as e: import traceback error_msg = traceback.format_exc() print(f"Python error: {str(e)}\\n{error_msg}") js_result = json.dumps({"error": str(e), "secrets": [], "missed_secrets": []}) `); // Get the result from Python const resultJson = pyodideInstance.globals.get("js_result"); // Check if we got a valid result if (!resultJson) { throw new Error("No result returned from Python scanner"); } // Parse the results const results = JSON.parse(resultJson); // Check if there was an error if (results.error) { throw new Error(`Python error: ${results.error}`); } // Add a note if the file was truncated if (truncated) { results.truncated = true; } return results; } catch (error) { console.error("Error scanning content:", error); const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Failed to scan content: ${errorMessage}`); } } /** * Scan a file for secrets * @param filePath The path of the file to scan * @param options Scan options * @returns Scan results */ async function scanFile(filePath, options = {}) { try { // Skip node_modules files unless explicitly included if ((filePath.includes("/node_modules/") || filePath.includes("\\node_modules\\")) && !options.includeNodeModules) { if (options.verbose) { console.log(`Skipping node_modules file: ${filePath}`); } return { secrets: [], missed_secrets: [] }; } // Skip .next build files if (filePath.includes("/.next/") || filePath.includes("\\.next\\")) { if (options.verbose) { console.log(`Skipping Next.js build file: ${filePath}`); } return { secrets: [], missed_secrets: [] }; } // Get file stats const stats = fs.statSync(filePath); // Skip directories if (stats.isDirectory()) { return { secrets: [], missed_secrets: [] }; } // Get the max file size from options const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE; // Check if it's a binary file if (isLikelyBinaryFile(filePath, stats.size, options)) { console.log(`Skipping likely binary file: ${filePath}`); return { secrets: [], missed_secrets: [] }; } // Skip large files if a limit is set and limitFileSize option is true if (maxFileSize > 0 && stats.size > maxFileSize && options.limitFileSize) { console.log(`Skipping large file (${Math.round(stats.size / 1024)}KB): ${filePath}`); return { secrets: [], missed_secrets: [] }; } // Read and scan the file const content = fs.readFileSync(filePath, "utf-8"); return scanContent(content, filePath, options); } catch (error) { console.warn(`Skipping file ${filePath}: ${error instanceof Error ? error.message : String(error)}`); return { secrets: [], missed_secrets: [] }; } } /** * Scan a directory for secrets * @param directory The directory to scan * @param options Scan options * @returns Scan results */ async function scanDirectory(directory = process.cwd(), options = {}) { if (!isInitialized) { await initialize(); } const results = { secrets: [], missed_secrets: [], }; // Default excluded directories if none provided const defaultExcludeDirs = [ "node_modules", ".git", "dist", "build", "coverage", ".next", "__pycache__", ".venv", "venv", "env", ".env", ]; // Default excluded file patterns if none provided const defaultExcludeFiles = [ "*.min.js", "*.min.css", "*.map", "*.lock", "package-lock.json", "yarn.lock", "pnpm-lock.yaml", "*.svg", "*.woff", "*.ttf", "*.eot", "*.jpg", "*.jpeg", "*.png", "*.gif", "*.ico", "*.pdf", "*.zip", "*.tar.gz", ]; // Get all files in the directory const getFiles = (dir, excludeDirs = []) => { let files = []; try { const entries = fs.readdirSync(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dir, entry.name); // Skip excluded directories if (entry.isDirectory()) { const excludePatterns = [ ...defaultExcludeDirs, ...(options.excludeDirs || []), ]; if (excludePatterns.some((pattern) => new RegExp(`^${pattern.replace(/\*/g, ".*")}$`).test(entry.name) || entry.name === pattern)) { continue; } try { files = files.concat(getFiles(fullPath, excludeDirs)); } catch (err) { console.warn(`Skipping directory ${fullPath}: ${err instanceof Error ? err.message : String(err)}`); } } else { // Skip excluded files const excludePatterns = [ ...defaultExcludeFiles, ...(options.excludeFiles || []), ]; if (excludePatterns.some((pattern) => { const regex = new RegExp(`^${pattern.replace(/\./g, "\\.").replace(/\*/g, ".*")}$`); return regex.test(entry.name); })) { continue; } files.push(fullPath); } } } catch (err) { console.warn(`Error reading directory ${dir}: ${err instanceof Error ? err.message : String(err)}`); } return files; }; const files = getFiles(directory, options.excludeDirs); // Track if any files were truncated let anyTruncated = false; // Scan each file with error handling for individual files for (const file of files) { try { const fileResults = await scanFile(file, options); // Check if this file was truncated if (fileResults.truncated) { anyTruncated = true; } results.secrets = results.secrets.concat(fileResults.secrets); results.missed_secrets = results.missed_secrets.concat(fileResults.missed_secrets); } catch (error) { console.warn(`Skipping file ${file}: ${error instanceof Error ? error.message : String(error)}`); } } // Set the truncated flag if any files were truncated if (anyTruncated) { results.truncated = true; } // If we have secrets and the enrichWithGitInfo option is true, add blame info if (results.secrets.length > 0 && (options.enrichWithGitInfo || options.enrichWithGitInfo === undefined)) { try { console.log("Enriching secrets with git blame information..."); results.secrets = await (0, gitleaks_1.enrichSecretsWithBlameInfo)(results.secrets, options); console.log("Git blame enrichment complete"); } catch (error) { console.warn(`Failed to enrich secrets with git blame info: ${error}`); } } // Add detectedBy field to all secrets results.secrets = results.secrets.map((secret) => ({ ...secret, detectedBy: "detect-secrets", })); console.log(results), "<<<<<----secrets results Package"; return results; } /** * Scan a directory using both detect-secrets and Gitleaks * @param directory The directory to scan * @param options Scan options * @returns Combined scan results */ async function scanWithBothScanners(directory = process.cwd(), options = {}) { console.log("Starting both detect-secrets and Gitleaks scanners in parallel..."); // Run both scanners in parallel const [detectSecretsResults, gitleaksResults] = await Promise.all([ // First promise: run detect-secrets (async () => { try { console.log("Running detect-secrets scanner..."); const results = await scanDirectory(directory, options); console.log(`detect-secrets found ${results.secrets.length} secrets`); return results; } catch (error) { console.error(`detect-secrets scan failed: ${error instanceof Error ? error.message : String(error)}`); return { secrets: [], missed_secrets: [] }; } })(), // Second promise: run Gitleaks (async () => { try { console.log("Running Gitleaks scanner..."); const results = await (0, gitleaks_1.runGitleaksScan)(directory, options); console.log(`Gitleaks found ${results.secrets.length} secrets`); return results; } catch (error) { if (error instanceof Error && error.message.includes("Gitleaks is not installed")) { console.error(`Gitleaks is not installed. Please install it following the instructions at https://github.com/zricethezav/gitleaks#installation`); } else { console.error(`Gitleaks scan failed: ${error instanceof Error ? error.message : String(error)}`); } return { secrets: [], missed_secrets: [] }; } })(), ]); // Create a map to store unique secrets const uniqueSecrets = new Map(); // Helper function to normalize file paths to use forward slashes const normalizePath = (filePath) => { return filePath.replace(/\\/g, "/"); }; // Helper function to create a unique key for a secret const createSecretKey = (secret) => { // Normalize the file path and use file and line for uniqueness return `${normalizePath(secret.file)}:${secret.line}`; }; // Helper function to merge secret types while preserving all unique types const mergeSecretTypes = (types1, types2) => { return [...new Set([...types1, ...types2])].sort(); }; // Add detect-secrets results first detectSecretsResults.secrets.forEach((secret) => { const key = createSecretKey(secret); uniqueSecrets.set(key, { ...secret, file: normalizePath(secret.file), // Normalize the path detectedBy: "detect-secrets", }); }); // Add Gitleaks results, merging with existing entries if they exist gitleaksResults.secrets.forEach((secret) => { const key = createSecretKey(secret); if (uniqueSecrets.has(key)) { // Merge with existing secret const existing = uniqueSecrets.get(key); uniqueSecrets.set(key, { ...existing, types: mergeSecretTypes(existing.types, secret.types), detectedBy: "both", // Keep additional metadata from Gitleaks if available author: secret.author || existing.author, email: secret.email || existing.email, date: secret.date || existing.date, commit: secret.commit || existing.commit, message: secret.message || existing.message, hashed_secret: secret.hashed_secret || existing.hashed_secret, }); } else { // Add new secret uniqueSecrets.set(key, { ...secret, file: normalizePath(secret.file), // Normalize the path detectedBy: "gitleaks", }); } }); // Convert map to array and sort by file and line number const mergedSecrets = Array.from(uniqueSecrets.values()).sort((a, b) => { if (a.file !== b.file) return a.file.localeCompare(b.file); return a.line - b.line; }); return { secrets: mergedSecrets, missed_secrets: [ ...detectSecretsResults.missed_secrets, ...gitleaksResults.missed_secrets, ], truncated: detectSecretsResults.truncated || gitleaksResults.truncated, }; } exports.default = { initialize, scanContent, scanFile, scanDirectory, scanWithBothScanners, runGitleaksScan: gitleaks_1.runGitleaksScan, scanRemoteRepository: gitleaks_1.scanRemoteRepository, scanGitHistory: gitleaks_1.scanGitHistory, };