UNPKG

agentsqripts

Version:

Comprehensive static code analysis toolkit for identifying technical debt, security vulnerabilities, performance issues, and code quality problems

119 lines (99 loc) 4.28 kB
/** * @file Find semantic duplicates using optimized algorithm * @description Single responsibility: Find similar code blocks across project using optimized O(n log n) algorithm */ const { calculateSemanticSimilarity, identifyDuplicationPattern } = require('../../ast/semanticSimilarityCalculator'); const { classifyDuplicationType } = require('../../ast/duplicateTypeClassifier'); const { getContextAwareRecommendations } = require('../../ast/contextAnalyzer'); const calculateAverageSimilarity = require('../metrics/calculateAverageSimilarity'); const calculateDuplicationImpact = require('../metrics/calculateDuplicationImpact'); /** * Find semantic duplicates using optimized O(n log n) algorithm */ function findSemanticDuplicatesOptimized(allBlocks, fileContexts) { const duplicateGroups = []; const processed = new Set(); // First pass: Group exact duplicates by hash (O(n)) const hashGroups = new Map(); allBlocks.forEach((block, index) => { if (!hashGroups.has(block.hash)) { hashGroups.set(block.hash, []); } hashGroups.get(block.hash).push({ block, index }); }); // Process exact duplicate groups for (const [hash, group] of hashGroups) { if (group.length > 1) { const blocks = group.map(g => g.block); const pattern = identifyDuplicationPattern(blocks); const classification = classifyDuplicationType(blocks, fileContexts); // Even exact duplicates might be acceptable (e.g., test boilerplate) if (classification.type !== 'ACCEPTABLE_SIMILARITY') { duplicateGroups.push({ type: 'exact_duplicate', pattern, blocks, similarity: 1.0, impact: calculateDuplicationImpact(blocks), classification, contextRecommendations: getContextAwareRecommendations({ blocks }, fileContexts) }); } // Mark as processed group.forEach(g => processed.add(g.index)); } } // Second pass: Find semantic duplicates among remaining blocks const remainingBlocks = allBlocks .map((block, index) => ({ block, index })) .filter(item => !processed.has(item.index)); // Group by type and similar complexity for efficient comparison const typeGroups = new Map(); remainingBlocks.forEach(item => { const key = `${item.block.type}-${Math.floor(item.block.complexity / 5)}`; if (!typeGroups.has(key)) { typeGroups.set(key, []); } typeGroups.get(key).push(item); }); // Compare within type groups (much smaller n) for (const [key, group] of typeGroups) { if (group.length < 2) continue; // Use similarity threshold to create groups const semanticGroups = []; for (let i = 0; i < group.length; i++) { if (processed.has(group[i].index)) continue; const currentGroup = [group[i].block]; processed.add(group[i].index); for (let j = i + 1; j < group.length; j++) { if (processed.has(group[j].index)) continue; const similarity = calculateSemanticSimilarity(group[i].block, group[j].block); if (similarity >= 0.7) { // Configurable threshold currentGroup.push(group[j].block); processed.add(group[j].index); } } if (currentGroup.length > 1) { const pattern = identifyDuplicationPattern(currentGroup); const classification = classifyDuplicationType(currentGroup, fileContexts); // Only include if it's a real duplicate, not a template pattern if (classification.type !== 'TEMPLATE_PATTERN' && classification.type !== 'ACCEPTABLE_SIMILARITY') { duplicateGroups.push({ type: 'semantic_duplicate', pattern, blocks: currentGroup, similarity: calculateAverageSimilarity(currentGroup), impact: calculateDuplicationImpact(currentGroup), classification, contextRecommendations: getContextAwareRecommendations({ blocks: currentGroup }, fileContexts) }); } } } } // Sort by impact duplicateGroups.sort((a, b) => b.impact.score - a.impact.score); return duplicateGroups; } module.exports = findSemanticDuplicatesOptimized;