detect-secrets-js
Version:
A JavaScript implementation of Yelp's detect-secrets tool - no Python required
351 lines (297 loc) • 10.9 kB
text/typescript
import { loadPyodide } from 'pyodide';
import * as fs from 'fs';
import * as path from 'path';
import { ScanOptions, ScanResults } from './types';
// Declare Node.js globals
declare const __dirname: string;
declare const process: {
cwd(): string;
};
// Global Pyodide instance
let pyodideInstance: any = null;
let isInitialized = false;
let isInitializing = false;
// Constants
const DEFAULT_MAX_FILE_SIZE = 0; // No default file size limit (0 means no limit)
const BINARY_FILE_EXTENSIONS = [
'.pack', '.gz', '.zip', '.jar', '.war', '.ear', '.class', '.so', '.dll', '.exe',
'.obj', '.o', '.a', '.lib', '.pyc', '.pyo', '.jpg', '.jpeg', '.png', '.gif',
'.bmp', '.ico', '.tif', '.tiff', '.mp3', '.mp4', '.avi', '.mov', '.wmv', '.flv',
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'
];
// Helper function to check if a file is likely binary
function isLikelyBinaryFile(filePath: string, fileSize: number): boolean {
// Check file extension
const ext = path.extname(filePath).toLowerCase();
if (BINARY_FILE_EXTENSIONS.includes(ext)) {
return true;
}
// Check if it's in a binary-like directory
if (filePath.includes('/.git/') ||
filePath.includes('/node_modules/') ||
filePath.includes('/__pycache__/') ||
filePath.includes('/.next/cache/')) {
return true;
}
// Try to read a small chunk to detect binary content
try {
const fd = fs.openSync(filePath, 'r');
const buffer = Buffer.alloc(Math.min(4096, fileSize));
fs.readSync(fd, buffer, 0, buffer.length, 0);
fs.closeSync(fd);
// Check for null bytes which often indicate binary data
for (let i = 0; i < buffer.length; i++) {
if (buffer[i] === 0) {
return true;
}
}
// Try to decode as UTF-8 - if it fails, likely binary
try {
buffer.toString('utf8');
} catch (e) {
return true;
}
} catch (e) {
// If we can't read the file, assume it's not binary
return false;
}
return false;
}
/**
* Initialize the WebAssembly module and Python environment
*/
export async function initialize(): Promise<void> {
if (isInitialized) return;
if (isInitializing) {
// Wait for initialization to complete if already in progress
while (isInitializing) {
await new Promise(resolve => setTimeout(resolve, 100));
}
return;
}
try {
isInitializing = true;
// Load Pyodide - use the default CDN path
console.log('Loading Pyodide...');
pyodideInstance = await loadPyodide();
// Load micropip package
console.log('Loading micropip...');
await pyodideInstance.loadPackage(['micropip', 'packaging']);
// Load the Python wrapper module
console.log('Setting up Python environment...');
const pythonCode = fs.readFileSync(
path.join(__dirname, 'python', 'detect_secrets_wrapper.py'),
'utf-8'
);
// Run the Python code to define the module
await pyodideInstance.runPythonAsync(pythonCode);
// Initialize the scanner (install dependencies)
console.log('Installing Python dependencies (this may take a moment)...');
await pyodideInstance.runPythonAsync('await initialize()');
isInitialized = true;
console.log('Initialization complete');
} catch (error: unknown) {
console.error('Failed to initialize:', error);
const errorMessage = error instanceof Error ? error.message : String(error);
throw new Error(`Failed to initialize WebAssembly module: ${errorMessage}`);
} finally {
isInitializing = false;
}
}
/**
* Scan a file or string content for secrets
* @param content The file content to scan
* @param filePath The path of the file (for reporting)
* @param options Scan options
* @returns Scan results
*/
export async function scanContent(
content: string,
filePath: string,
options: Partial<ScanOptions> = {}
): Promise<ScanResults> {
if (!isInitialized) {
await initialize();
}
try {
// Get the max file size from options
const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE;
// If max file size is set and content is too large, truncate it to prevent memory issues
let truncated = false;
if (maxFileSize > 0 && content.length > maxFileSize) {
content = content.substring(0, maxFileSize);
truncated = true;
console.warn(`File ${filePath} is too large, scanning only the first ${maxFileSize} bytes`);
}
// Convert options to a Python-compatible format
const checkMissed = options.checkMissed ? true : false;
// Set up Python variables
pyodideInstance.globals.set('js_file_content', content);
pyodideInstance.globals.set('js_file_path', filePath);
pyodideInstance.globals.set('js_check_missed', checkMissed);
// Call the Python scan_file function
await pyodideInstance.runPythonAsync(`
import json
try:
result_json = scan_file(js_file_content, js_file_path, js_check_missed)
js_result = result_json
except Exception as e:
import traceback
error_msg = traceback.format_exc()
print(f"Python error: {str(e)}\\n{error_msg}")
js_result = json.dumps({"error": str(e), "secrets": [], "missed_secrets": []})
`);
// Get the result from Python
const resultJson = pyodideInstance.globals.get('js_result');
// Check if we got a valid result
if (!resultJson) {
throw new Error('No result returned from Python scanner');
}
// Parse the results
const results = JSON.parse(resultJson);
// Check if there was an error
if (results.error) {
throw new Error(`Python error: ${results.error}`);
}
// Add a note if the file was truncated
if (truncated) {
results.truncated = true;
}
return results;
} catch (error: unknown) {
console.error('Error scanning content:', error);
const errorMessage = error instanceof Error ? error.message : String(error);
throw new Error(`Failed to scan content: ${errorMessage}`);
}
}
/**
* Scan a file for secrets
* @param filePath The path of the file to scan
* @param options Scan options
* @returns Scan results
*/
export async function scanFile(
filePath: string,
options: Partial<ScanOptions> = {}
): Promise<ScanResults> {
try {
// Get file stats
const stats = fs.statSync(filePath);
// Skip directories
if (stats.isDirectory()) {
return { secrets: [], missed_secrets: [] };
}
// Get the max file size from options
const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE;
// Check if it's a binary file
if (isLikelyBinaryFile(filePath, stats.size)) {
console.log(`Skipping likely binary file: ${filePath}`);
return { secrets: [], missed_secrets: [] };
}
// Skip large files if a limit is set and limitFileSize option is true
if (maxFileSize > 0 && stats.size > maxFileSize && options.limitFileSize) {
console.log(`Skipping large file (${Math.round(stats.size / 1024)}KB): ${filePath}`);
return { secrets: [], missed_secrets: [] };
}
// Read and scan the file
const content = fs.readFileSync(filePath, 'utf-8');
return scanContent(content, filePath, options);
} catch (error: unknown) {
console.warn(`Skipping file ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
return { secrets: [], missed_secrets: [] };
}
}
/**
* Scan a directory for secrets
* @param directory The directory to scan
* @param options Scan options
* @returns Scan results
*/
export async function scanDirectory(
directory: string = process.cwd(),
options: Partial<ScanOptions> = {}
): Promise<ScanResults> {
if (!isInitialized) {
await initialize();
}
const results: ScanResults = {
secrets: [],
missed_secrets: []
};
// Default excluded directories if none provided
const defaultExcludeDirs = [
'node_modules', '.git', 'dist', 'build', 'coverage', '.next',
'__pycache__', '.venv', 'venv', 'env', '.env'
];
// Default excluded file patterns if none provided
const defaultExcludeFiles = [
'*.min.js', '*.min.css', '*.map', '*.lock', '*.svg', '*.woff', '*.ttf', '*.eot',
'*.jpg', '*.jpeg', '*.png', '*.gif', '*.ico', '*.pdf', '*.zip', '*.tar.gz'
];
// Get all files in the directory
const getFiles = (dir: string, excludeDirs: string[] = []): string[] => {
let files: string[] = [];
try {
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
// Skip excluded directories
if (entry.isDirectory()) {
const excludePatterns = [...defaultExcludeDirs, ...(options.excludeDirs || [])];
if (excludePatterns.some(pattern =>
new RegExp(`^${pattern.replace(/\*/g, '.*')}$`).test(entry.name) ||
entry.name === pattern
)) {
continue;
}
try {
files = files.concat(getFiles(fullPath, excludeDirs));
} catch (err) {
console.warn(`Skipping directory ${fullPath}: ${err instanceof Error ? err.message : String(err)}`);
}
} else {
// Skip excluded files
const excludePatterns = [...defaultExcludeFiles, ...(options.excludeFiles || [])];
if (excludePatterns.some(pattern => {
const regex = new RegExp(`^${pattern.replace(/\./g, '\\.').replace(/\*/g, '.*')}$`);
return regex.test(entry.name);
})) {
continue;
}
files.push(fullPath);
}
}
} catch (err) {
console.warn(`Error reading directory ${dir}: ${err instanceof Error ? err.message : String(err)}`);
}
return files;
};
const files = getFiles(directory, options.excludeDirs);
// Track if any files were truncated
let anyTruncated = false;
// Scan each file with error handling for individual files
for (const file of files) {
try {
const fileResults = await scanFile(file, options);
// Check if this file was truncated
if (fileResults.truncated) {
anyTruncated = true;
}
results.secrets = results.secrets.concat(fileResults.secrets);
results.missed_secrets = results.missed_secrets.concat(fileResults.missed_secrets);
} catch (error: unknown) {
console.warn(`Skipping file ${file}: ${error instanceof Error ? error.message : String(error)}`);
}
}
// Set the truncated flag if any files were truncated
if (anyTruncated) {
results.truncated = true;
}
return results;
}
export default {
initialize,
scanContent,
scanFile,
scanDirectory
};