UNPKG

agentsqripts

Version:

Comprehensive static code analysis toolkit for identifying technical debt, security vulnerabilities, performance issues, and code quality problems

133 lines (121 loc) 6.17 kB
/** * @file Programming language detection for security analysis * @description Identifies programming languages used in the project to apply language-specific security rules * This module enables targeted vulnerability detection by understanding the language composition * of a project. Different programming languages have distinct vulnerability patterns, attack vectors, * and security best practices. Accurate language detection allows the security scanner to focus * on relevant vulnerability types and provide appropriate remediation guidance. */ const path = require('path'); // Language-specific security characteristics and vulnerability patterns // Rationale: Each programming language has unique attack surfaces and common vulnerability types // This mapping enables focused security analysis and helps prioritize scanning efforts const LANGUAGE_PATTERNS = { javascript: { extensions: ['.js', '.jsx', '.mjs'], // Common JavaScript file extensions including ES modules indicators: ['node_modules', 'package.json'], // Ecosystem indicators for additional confidence securityRisks: ['XSS', 'Prototype pollution', 'Command injection'] // Primary JS vulnerability categories }, typescript: { extensions: ['.ts', '.tsx'], // TypeScript source files and React TypeScript files indicators: ['tsconfig.json', 'package.json'], // TypeScript project configuration files securityRisks: ['Type confusion', 'XSS', 'Prototype pollution'] // TS-specific plus inherited JS risks }, python: { extensions: ['.py'], // Python source files indicators: ['requirements.txt', 'setup.py', '__pycache__'], // Python project and runtime indicators securityRisks: ['Code injection', 'Pickle deserialization', 'Path traversal'] // Common Python vulnerabilities }, java: { extensions: ['.java'], // Java source files indicators: ['pom.xml', 'build.gradle'], // Maven and Gradle build system indicators securityRisks: ['Deserialization', 'XXE', 'SQL injection'] // Enterprise Java vulnerability patterns } }; /** * Detect programming languages present in the project with statistical confidence * @param {Array<string>} files - Array of file paths to analyze for language detection * @returns {Array<Object>} Detected languages sorted by prevalence with security risk information * * Rationale: Uses file extension analysis combined with statistical confidence scoring to * identify the primary languages in a project. Sorting by file count helps prioritize * security analysis efforts on the most prevalent languages first, maximizing impact * of limited analysis time. */ function detectLanguages(files) { const languageStats = {}; const detectedLanguages = []; // Count files by language for statistical analysis files.forEach(file => { const ext = path.extname(file); const language = getLanguageFromExtension(ext); if (language) { languageStats[language] = (languageStats[language] || 0) + 1; } }); // Convert statistics to structured language detection results Object.entries(languageStats).forEach(([language, count]) => { const config = LANGUAGE_PATTERNS[language]; if (config) { detectedLanguages.push({ name: language, fileCount: count, // Number of files in this language securityRisks: config.securityRisks, // Known vulnerability types for this language confidence: calculateConfidence(count, files.length) // Statistical confidence in detection }); } }); // Return languages sorted by prevalence for prioritized analysis return detectedLanguages.sort((a, b) => b.fileCount - a.fileCount); } /** * Map file extension to programming language * @param {string} extension - File extension including the dot (e.g., '.js') * @returns {string|null} Language name if recognized, null otherwise * * Rationale: Centralizes extension-to-language mapping for consistency across the application. * File extensions are the most reliable way to identify programming languages in most * codebases, though this approach may miss polyglot files or unusual naming conventions. */ function getLanguageFromExtension(extension) { for (const [language, config] of Object.entries(LANGUAGE_PATTERNS)) { if (config.extensions.includes(extension)) { return language; // Return first match - extensions should be unique across languages } } return null; // Extension not recognized as a supported language } /** * Calculate confidence level in language detection based on file prevalence * @param {number} languageFileCount - Number of files detected for this language * @param {number} totalFiles - Total number of files analyzed * @returns {string} Confidence level: 'high', 'medium', or 'low' * * Rationale: Provides confidence scoring to help analysts understand the reliability * of language detection. Higher file counts indicate the language is actually used * rather than just having a few leftover files from previous development phases. */ function calculateConfidence(languageFileCount, totalFiles) { const percentage = (languageFileCount / totalFiles) * 100; if (percentage > 50) return 'high'; // Language is majority of codebase if (percentage > 20) return 'medium'; // Significant presence but not dominant return 'low'; // Minor presence, possibly legacy or utility files } /** * Get security vulnerability patterns associated with a specific language * @param {string} language - Programming language name * @returns {Array<string>} Array of vulnerability types common to this language * * Rationale: Enables language-specific vulnerability scanning by providing the security * patterns that should be prioritized for each language. This helps focus limited * analysis resources on the most relevant vulnerability types for the detected languages. */ function getLanguageSecurityPatterns(language) { const config = LANGUAGE_PATTERNS[language]; return config ? config.securityRisks : []; // Return empty array for unknown languages } module.exports = { detectLanguages, getLanguageFromExtension, getLanguageSecurityPatterns };