@paulohenriquevn/m2js
Version:
Transform TypeScript/JavaScript code into LLM-friendly Markdown summaries + Smart Dead Code Detection + Graph-Deep Diff Analysis. Extract exported functions, classes, and JSDoc comments for better AI context with 60%+ token reduction. Intelligent dead cod
463 lines • 17.7 kB
JavaScript
;
/**
* Duplicate Code Analyzer - Integration with jscpd
* Provides LLM-friendly duplicate code detection and reporting
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.analyzeDuplicateCode = analyzeDuplicateCode;
const child_process_1 = require("child_process");
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const os_1 = require("os");
/**
* Analyze duplicate code in the provided files using jscpd
*/
async function analyzeDuplicateCode(files, options = {}) {
if (!files || files.length === 0) {
throw new Error('No files provided for duplicate code analysis');
}
const startTime = Date.now();
// Determine project path
const projectPath = path.dirname(path.resolve(files[0]));
// Set default options
const analysisOptions = {
minLines: options.minLines ?? 5,
minTokens: options.minTokens ?? 50,
format: options.format ?? 'table',
includeContext: options.includeContext ?? true,
includeSuggestions: options.includeSuggestions ?? true,
maxFiles: options.maxFiles ?? 1000,
ignore: options.ignore ?? [
'node_modules',
'dist',
'build',
'*.test.ts',
'*.spec.ts',
],
};
try {
// Run jscpd analysis
const jscpdResult = await runJscpdAnalysis(files, analysisOptions, projectPath);
// Process jscpd results into our format
const duplicates = processJscpdResults(jscpdResult, projectPath);
// Generate refactoring suggestions
const suggestions = generateRefactoringSuggestions(duplicates);
// Calculate metrics
const metrics = calculateMetrics(files, duplicates, Date.now() - startTime);
// Analyze files that were processed
const analyzedFiles = files.filter(file => fs.existsSync(file));
const skippedFiles = files.filter(file => !fs.existsSync(file));
return {
projectPath,
options: analysisOptions,
duplicates,
suggestions,
metrics,
timestamp: new Date(),
analyzedFiles,
skippedFiles,
};
}
catch (error) {
throw new Error(`Duplicate code analysis failed: ${error.message}`);
}
}
/**
* Run jscpd analysis and return parsed results
*/
async function runJscpdAnalysis(files, options, projectPath) {
// Create temporary output file for jscpd JSON results
const tempDir = (0, os_1.tmpdir)();
const outputFile = path.join(tempDir, `jscpd-${Date.now()}.json`);
// Create jscpd config
const jscpdConfig = {
minLines: options.minLines,
minTokens: options.minTokens,
format: ['typescript', 'javascript'],
ignore: options.ignore,
reporters: ['json'],
output: path.dirname(outputFile),
};
// Create temporary config file
const configFile = path.join(tempDir, `jscpd-config-${Date.now()}.json`);
fs.writeFileSync(configFile, JSON.stringify(jscpdConfig, null, 2));
try {
// Determine if we're analyzing files or directories
const targets = files.length === 1 && fs.statSync(files[0]).isDirectory()
? [files[0]]
: [path.dirname(files[0])];
// Build jscpd command
const jscpdCmd = [
'npx jscpd',
...targets.map(t => `"${t}"`),
`--config "${configFile}"`,
'-r json',
`--output "${path.dirname(outputFile)}"`,
'--silent',
].join(' ');
// Execute jscpd
(0, child_process_1.execSync)(jscpdCmd, {
cwd: projectPath,
stdio: 'pipe',
timeout: 120000, // 2 minute timeout
});
// Read and parse results - jscpd creates jscpd-report.json in the output directory
const reportFile = path.join(path.dirname(outputFile), 'jscpd-report.json');
if (!fs.existsSync(reportFile)) {
// No duplicates found
return {
duplicates: [],
statistics: {
total: { files: files.length, lines: 0, tokens: 0 },
clones: { lines: 0, tokens: 0, duplicatedLines: 0, percentage: 0 },
formats: {},
},
};
}
const resultContent = fs.readFileSync(reportFile, 'utf-8');
const result = JSON.parse(resultContent);
// Cleanup temporary files
fs.unlinkSync(reportFile);
fs.unlinkSync(configFile);
return result;
}
catch (error) {
// Cleanup on error
try {
const reportFile = path.join(path.dirname(outputFile), 'jscpd-report.json');
if (fs.existsSync(reportFile))
fs.unlinkSync(reportFile);
if (fs.existsSync(configFile))
fs.unlinkSync(configFile);
}
catch {
// Ignore cleanup errors
}
throw new Error(`jscpd execution failed: ${error.message}`);
}
}
/**
* Process jscpd results into our duplicate block format
*/
function processJscpdResults(jscpdResult, projectPath) {
const duplicates = [];
const processedFragments = new Set();
jscpdResult.duplicates.forEach((duplicate, index) => {
// Use the actual fragment content as key
const fragmentKey = duplicate.fragment || `${duplicate.lines}-${index}`;
// Skip if we've already processed this fragment pattern
if (processedFragments.has(fragmentKey)) {
return;
}
processedFragments.add(fragmentKey);
// Get code content from the jscpd fragment or extract from file
const code = duplicate.fragment ||
extractCodeFromFile(duplicate.firstFile.name, duplicate.firstFile.start, duplicate.firstFile.end);
// Calculate similarity (jscpd finds exact matches, so 100%)
const similarity = 100;
// Determine duplicate type
const type = similarity === 100 ? 'exact' : 'similar';
// Get lines and tokens from jscpd data or calculate
const lines = duplicate.lines ||
duplicate.firstFile.end - duplicate.firstFile.start + 1;
const tokens = duplicate.tokens || 50; // fallback
// Calculate complexity based on lines and tokens
const complexity = calculateComplexity(lines, tokens);
const locations = [
{
file: path.relative(projectPath, duplicate.firstFile.name),
startLine: duplicate.firstFile.start,
endLine: duplicate.firstFile.end,
context: extractContext(duplicate.firstFile.name, duplicate.firstFile.start),
},
{
file: path.relative(projectPath, duplicate.secondFile.name),
startLine: duplicate.secondFile.start,
endLine: duplicate.secondFile.end,
context: extractContext(duplicate.secondFile.name, duplicate.secondFile.start),
},
];
duplicates.push({
id: `dup-${index + 1}`,
code,
lines,
tokens,
locations,
similarity,
type,
complexity,
});
});
return duplicates;
}
/**
* Extract code content from file between specified lines
*/
function extractCodeFromFile(filePath, startLine, endLine) {
try {
const content = fs.readFileSync(filePath, 'utf-8');
const lines = content.split('\n');
return lines
.slice(startLine - 1, endLine)
.join('\n')
.trim();
}
catch {
return 'Could not read file content';
}
}
/**
* Extract context information (function/class name) around the duplicate
*/
function extractContext(filePath, lineNumber) {
try {
const content = fs.readFileSync(filePath, 'utf-8');
const lines = content.split('\n');
// Look backwards from the duplicate line to find function/class context
for (let i = lineNumber - 2; i >= Math.max(0, lineNumber - 20); i--) {
const line = lines[i]?.trim();
if (!line)
continue;
// Look for function declarations
const functionMatch = line.match(/(?:function|const|let|var)\s+(\w+)|(\w+)\s*[:=]\s*(?:function|\()/);
if (functionMatch) {
return `function ${functionMatch[1] || functionMatch[2]}`;
}
// Look for class declarations
const classMatch = line.match(/class\s+(\w+)/);
if (classMatch) {
return `class ${classMatch[1]}`;
}
// Look for method declarations
const methodMatch = line.match(/(\w+)\s*\([^)]*\)\s*[:{]/);
if (methodMatch) {
return `method ${methodMatch[1]}`;
}
}
return undefined;
}
catch {
return undefined;
}
}
/**
* Calculate complexity score based on lines and tokens
*/
function calculateComplexity(lines, tokens) {
// Simple complexity calculation
// More lines and tokens = higher complexity
const lineScore = Math.min(lines / 10, 10); // Max 10 points for lines
const tokenScore = Math.min(tokens / 100, 10); // Max 10 points for tokens
return Math.round((lineScore + tokenScore) / 2);
}
/**
* Generate refactoring suggestions based on duplicate blocks
*/
function generateRefactoringSuggestions(duplicates) {
const suggestions = [];
duplicates.forEach((duplicate, index) => {
if (duplicate.locations.length < 2)
return;
// Determine suggestion type based on duplicate characteristics
const suggestionType = determineSuggestionType(duplicate);
// Calculate priority based on complexity and number of duplications
const priority = calculatePriority(duplicate);
// Estimate effort based on complexity
const effort = duplicate.complexity > 7
? 'high'
: duplicate.complexity > 4
? 'medium'
: 'low';
// Calculate potential impact
const potentialSavings = (duplicate.locations.length - 1) * duplicate.lines;
const impact = `Remove ${potentialSavings} lines, reduce duplication by ${duplicate.locations.length}x`;
// Generate suggested name
const suggestedName = generateSuggestedName(duplicate, suggestionType);
suggestions.push({
id: `suggestion-${index + 1}`,
priority,
type: suggestionType,
description: generateSuggestionDescription(suggestionType, duplicate),
effort,
impact,
affectedBlocks: [duplicate.id],
suggestedName,
example: generateRefactoringExample(duplicate, suggestionType, suggestedName),
});
});
// Sort by priority and impact
return suggestions.sort((a, b) => {
const priorityOrder = { high: 3, medium: 2, low: 1 };
return priorityOrder[b.priority] - priorityOrder[a.priority];
});
}
/**
* Determine the best refactoring approach for a duplicate
*/
function determineSuggestionType(duplicate) {
// Analyze the code content to suggest appropriate refactoring
const code = duplicate.code.toLowerCase();
if (code.includes('class ') || code.includes('interface ')) {
return 'extract-class';
}
if (code.includes('function ') ||
code.includes('=>') ||
duplicate.lines > 15) {
return 'extract-function';
}
if (duplicate.locations.length > 3 && duplicate.lines > 20) {
return 'extract-module';
}
return 'extract-function'; // Default
}
/**
* Calculate suggestion priority
*/
function calculatePriority(duplicate) {
const score = duplicate.complexity + duplicate.locations.length * 2 + duplicate.lines / 5;
if (score > 15)
return 'high';
if (score > 8)
return 'medium';
return 'low';
}
/**
* Generate suggested name for extracted code
*/
function generateSuggestedName(duplicate, type) {
const context = duplicate.locations[0]?.context;
const baseName = context?.includes('function') ? 'sharedLogic' : 'commonCode';
switch (type) {
case 'extract-function':
return `extract${capitalize(baseName)}`;
case 'extract-class':
return `${capitalize(baseName)}Helper`;
case 'extract-module':
return `${baseName}Module`;
default:
return baseName;
}
}
/**
* Generate description for refactoring suggestion
*/
function generateSuggestionDescription(type, duplicate) {
const locations = duplicate.locations.length;
const lines = duplicate.lines;
switch (type) {
case 'extract-function':
return `Extract ${lines} lines of duplicated code into a reusable function. Found in ${locations} locations.`;
case 'extract-class':
return `Extract duplicated class/interface pattern into a base class or shared interface. Found in ${locations} locations.`;
case 'extract-module':
return `Extract large duplicated block (${lines} lines) into a separate module. Found in ${locations} locations.`;
case 'parameterize':
return `Parameterize similar code blocks to reduce duplication. Found in ${locations} locations.`;
default:
return `Refactor duplicated code found in ${locations} locations.`;
}
}
/**
* Generate example of refactored code
*/
function generateRefactoringExample(duplicate, type, suggestedName) {
const firstLines = duplicate.code.split('\n').slice(0, 3).join('\n');
switch (type) {
case 'extract-function':
return `// Before: ${duplicate.lines} lines duplicated\n// After:\nfunction ${suggestedName}() {\n ${firstLines}\n // ... rest of logic\n}\n\n// Usage:\n${suggestedName}();`;
case 'extract-class':
return `// Before: ${duplicate.lines} lines duplicated\n// After:\nclass ${suggestedName} {\n ${firstLines}\n // ... rest of logic\n}\n\n// Usage:\nconst helper = new ${suggestedName}();`;
default:
return `// Extract into: ${suggestedName}\n${firstLines}\n// ... rest of logic`;
}
}
/**
* Calculate analysis metrics
*/
function calculateMetrics(files, duplicates, analysisTimeMs) {
// Calculate total lines across all files
let totalLines = 0;
let totalFiles = 0;
files.forEach(file => {
try {
if (fs.existsSync(file)) {
const content = fs.readFileSync(file, 'utf-8');
totalLines += content.split('\n').length;
totalFiles++;
}
}
catch {
// Skip files that can't be read
}
});
const duplicatedLines = duplicates.reduce((sum, dup) => sum + dup.lines * (dup.locations.length - 1), 0);
const duplicationPercentage = totalLines > 0 ? (duplicatedLines / totalLines) * 100 : 0;
const averageDuplicateSize = duplicates.length > 0
? duplicates.reduce((sum, dup) => sum + dup.lines, 0) / duplicates.length
: 0;
const largestDuplicateSize = duplicates.length > 0 ? Math.max(...duplicates.map(dup => dup.lines)) : 0;
// Find files with most duplications
const fileOccurrences = new Map();
duplicates.forEach(dup => {
dup.locations.forEach(loc => {
fileOccurrences.set(loc.file, (fileOccurrences.get(loc.file) || 0) + 1);
});
});
const mostDuplicatedFiles = Array.from(fileOccurrences.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 5)
.map(([file]) => file);
const potentialSavings = duplicatedLines;
return {
totalFiles,
totalLines,
totalDuplicates: duplicates.length,
duplicatedLines,
duplicationPercentage,
averageDuplicateSize,
largestDuplicateSize,
mostDuplicatedFiles,
analysisTimeMs,
potentialSavings,
};
}
/**
* Utility function to capitalize first letter
*/
function capitalize(str) {
return str.charAt(0).toUpperCase() + str.slice(1);
}
//# sourceMappingURL=duplicate-code-analyzer.js.map