agentsqripts
Version:
Comprehensive static code analysis toolkit for identifying technical debt, security vulnerabilities, performance issues, and code quality problems
133 lines (121 loc) • 6.17 kB
JavaScript
/**
* @file Programming language detection for security analysis
* @description Identifies programming languages used in the project to apply language-specific security rules
* This module enables targeted vulnerability detection by understanding the language composition
* of a project. Different programming languages have distinct vulnerability patterns, attack vectors,
* and security best practices. Accurate language detection allows the security scanner to focus
* on relevant vulnerability types and provide appropriate remediation guidance.
*/
const path = require('path');
// Language-specific security characteristics and vulnerability patterns
// Rationale: Each programming language has unique attack surfaces and common vulnerability types
// This mapping enables focused security analysis and helps prioritize scanning efforts
const LANGUAGE_PATTERNS = {
javascript: {
extensions: ['.js', '.jsx', '.mjs'], // Common JavaScript file extensions including ES modules
indicators: ['node_modules', 'package.json'], // Ecosystem indicators for additional confidence
securityRisks: ['XSS', 'Prototype pollution', 'Command injection'] // Primary JS vulnerability categories
},
typescript: {
extensions: ['.ts', '.tsx'], // TypeScript source files and React TypeScript files
indicators: ['tsconfig.json', 'package.json'], // TypeScript project configuration files
securityRisks: ['Type confusion', 'XSS', 'Prototype pollution'] // TS-specific plus inherited JS risks
},
python: {
extensions: ['.py'], // Python source files
indicators: ['requirements.txt', 'setup.py', '__pycache__'], // Python project and runtime indicators
securityRisks: ['Code injection', 'Pickle deserialization', 'Path traversal'] // Common Python vulnerabilities
},
java: {
extensions: ['.java'], // Java source files
indicators: ['pom.xml', 'build.gradle'], // Maven and Gradle build system indicators
securityRisks: ['Deserialization', 'XXE', 'SQL injection'] // Enterprise Java vulnerability patterns
}
};
/**
* Detect programming languages present in the project with statistical confidence
* @param {Array<string>} files - Array of file paths to analyze for language detection
* @returns {Array<Object>} Detected languages sorted by prevalence with security risk information
*
* Rationale: Uses file extension analysis combined with statistical confidence scoring to
* identify the primary languages in a project. Sorting by file count helps prioritize
* security analysis efforts on the most prevalent languages first, maximizing impact
* of limited analysis time.
*/
function detectLanguages(files) {
const languageStats = {};
const detectedLanguages = [];
// Count files by language for statistical analysis
files.forEach(file => {
const ext = path.extname(file);
const language = getLanguageFromExtension(ext);
if (language) {
languageStats[language] = (languageStats[language] || 0) + 1;
}
});
// Convert statistics to structured language detection results
Object.entries(languageStats).forEach(([language, count]) => {
const config = LANGUAGE_PATTERNS[language];
if (config) {
detectedLanguages.push({
name: language,
fileCount: count, // Number of files in this language
securityRisks: config.securityRisks, // Known vulnerability types for this language
confidence: calculateConfidence(count, files.length) // Statistical confidence in detection
});
}
});
// Return languages sorted by prevalence for prioritized analysis
return detectedLanguages.sort((a, b) => b.fileCount - a.fileCount);
}
/**
* Map file extension to programming language
* @param {string} extension - File extension including the dot (e.g., '.js')
* @returns {string|null} Language name if recognized, null otherwise
*
* Rationale: Centralizes extension-to-language mapping for consistency across the application.
* File extensions are the most reliable way to identify programming languages in most
* codebases, though this approach may miss polyglot files or unusual naming conventions.
*/
function getLanguageFromExtension(extension) {
for (const [language, config] of Object.entries(LANGUAGE_PATTERNS)) {
if (config.extensions.includes(extension)) {
return language; // Return first match - extensions should be unique across languages
}
}
return null; // Extension not recognized as a supported language
}
/**
* Calculate confidence level in language detection based on file prevalence
* @param {number} languageFileCount - Number of files detected for this language
* @param {number} totalFiles - Total number of files analyzed
* @returns {string} Confidence level: 'high', 'medium', or 'low'
*
* Rationale: Provides confidence scoring to help analysts understand the reliability
* of language detection. Higher file counts indicate the language is actually used
* rather than just having a few leftover files from previous development phases.
*/
function calculateConfidence(languageFileCount, totalFiles) {
const percentage = (languageFileCount / totalFiles) * 100;
if (percentage > 50) return 'high'; // Language is majority of codebase
if (percentage > 20) return 'medium'; // Significant presence but not dominant
return 'low'; // Minor presence, possibly legacy or utility files
}
/**
* Get security vulnerability patterns associated with a specific language
* @param {string} language - Programming language name
* @returns {Array<string>} Array of vulnerability types common to this language
*
* Rationale: Enables language-specific vulnerability scanning by providing the security
* patterns that should be prioritized for each language. This helps focus limited
* analysis resources on the most relevant vulnerability types for the detected languages.
*/
function getLanguageSecurityPatterns(language) {
const config = LANGUAGE_PATTERNS[language];
return config ? config.securityRisks : []; // Return empty array for unknown languages
}
module.exports = {
detectLanguages,
getLanguageFromExtension,
getLanguageSecurityPatterns
};