claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
698 lines • 28 kB
JavaScript
/**
* Diff Classifier for Change Analysis
*/
const DEFAULT_CONFIG = {
maxDiffSize: 10000,
classifyByImpact: true,
detectRefactoring: true,
minConfidence: 0.5,
};
const CLASSIFICATION_PATTERNS = {
feature: [/^feat/, /add.*feature/, /implement/, /new.*functionality/i],
bugfix: [/^fix/, /bug/, /patch/, /resolve.*issue/i, /hotfix/i],
refactor: [/^refactor/, /restructure/, /reorganize/, /cleanup/i, /rename/i],
docs: [/^docs?/, /documentation/, /readme/i, /comment/i, /\.md$/i],
test: [/^test/, /spec/, /\.test\.[jt]sx?$/, /\.spec\.[jt]sx?$/, /__tests__/],
config: [/^config/, /\.config\./, /package\.json/, /tsconfig/, /\.env/],
style: [/^style/, /format/, /lint/, /prettier/, /eslint/],
};
const IMPACT_KEYWORDS = {
security: 3, auth: 3, payment: 3, database: 2, api: 2, core: 2,
util: 1, helper: 1, test: 0, mock: 0, fixture: 0,
};
export class DiffClassifier {
config;
ruvectorEngine = null;
useNative = false;
classificationCache = new Map();
constructor(config = {}) {
this.config = { ...DEFAULT_CONFIG, ...config };
}
async initialize() {
try {
// @ruvector/diff is optional - gracefully fallback if not installed
const ruvector = await import('@ruvector/diff').catch(() => null);
if (ruvector) {
this.ruvectorEngine = ruvector.createDiffClassifier?.(this.config);
this.useNative = !!this.ruvectorEngine;
}
}
catch {
this.useNative = false;
}
}
parseDiff(diffContent) {
const files = [];
const fileBlocks = diffContent.split(/^diff --git/m).filter(Boolean);
for (const block of fileBlocks) {
const pathMatch = block.match(/a\/(.+?)\s+b\/(.+)/);
if (!pathMatch)
continue;
const path = pathMatch[2];
const hunks = this.parseHunks(block);
const additions = hunks.reduce((sum, h) => sum + h.changes.filter(c => c.type === 'add').length, 0);
const deletions = hunks.reduce((sum, h) => sum + h.changes.filter(c => c.type === 'remove').length, 0);
const classification = this.classifyFile(path, hunks);
files.push({ path, hunks, additions, deletions, classification });
}
return files;
}
classify(files) {
const overall = this.computeOverallClassification(files);
const stats = {
totalAdditions: files.reduce((sum, f) => sum + f.additions, 0),
totalDeletions: files.reduce((sum, f) => sum + f.deletions, 0),
filesChanged: files.length,
avgConfidence: files.length > 0 ? files.reduce((sum, f) => sum + f.classification.confidence, 0) / files.length : 0,
};
return { files, overall, stats, timestamp: Date.now() };
}
classifyCommitMessage(message) {
const lowerMessage = message.toLowerCase();
for (const [type, patterns] of Object.entries(CLASSIFICATION_PATTERNS)) {
for (const pattern of patterns) {
if (pattern.test(lowerMessage))
return type;
}
}
return 'unknown';
}
getStats() {
return { useNative: this.useNative, cacheSize: this.classificationCache.size };
}
clearCache() { this.classificationCache.clear(); }
parseHunks(block) {
const hunks = [];
const hunkMatches = block.matchAll(/@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@([^\n]*)\n([\s\S]*?)(?=@@|$)/g);
for (const match of hunkMatches) {
const oldStart = parseInt(match[1], 10);
const oldLines = parseInt(match[2] || '1', 10);
const newStart = parseInt(match[3], 10);
const newLines = parseInt(match[4] || '1', 10);
const content = match[6] || '';
const changes = this.parseChanges(content, newStart);
hunks.push({ oldStart, oldLines, newStart, newLines, content, changes });
}
return hunks;
}
parseChanges(content, startLine) {
const changes = [];
const lines = content.split('\n');
let lineNumber = startLine;
for (const line of lines) {
if (line.startsWith('+')) {
changes.push({ type: 'add', lineNumber, content: line.substring(1) });
lineNumber++;
}
else if (line.startsWith('-')) {
changes.push({ type: 'remove', lineNumber: -1, content: line.substring(1) });
}
else if (line.startsWith(' ') || line === '') {
changes.push({ type: 'context', lineNumber, content: line.substring(1) || '' });
lineNumber++;
}
}
return changes;
}
classifyFile(path, hunks) {
const cacheKey = this.getCacheKey(path, hunks);
const cached = this.classificationCache.get(cacheKey);
if (cached)
return cached;
const primary = this.determinePrimaryClassification(path, hunks);
const secondary = this.determineSecondaryClassifications(path, hunks, primary);
const confidence = this.calculateConfidence(path, hunks, primary);
const impactLevel = this.determineImpactLevel(path, hunks);
const suggestedReviewers = this.suggestReviewers(path, primary, impactLevel);
const testingStrategy = this.determineTestingStrategy(path, primary, impactLevel);
const riskFactors = this.identifyRiskFactors(path, hunks, impactLevel);
const classification = { primary, secondary, confidence, impactLevel, suggestedReviewers, testingStrategy, riskFactors };
this.classificationCache.set(cacheKey, classification);
return classification;
}
getCacheKey(path, hunks) {
const hunkSummary = hunks.map(h => h.oldStart + ':' + h.newStart).join(',');
return path + ':' + hunkSummary;
}
determinePrimaryClassification(path, hunks) {
for (const [type, patterns] of Object.entries(CLASSIFICATION_PATTERNS)) {
for (const pattern of patterns) {
if (pattern.test(path))
return type;
}
}
const allContent = hunks.flatMap(h => h.changes.map(c => c.content)).join('\n').toLowerCase();
if (/function|class|interface|type\s+\w+/.test(allContent) && hunks.some(h => h.changes.filter(c => c.type === 'add').length > 10))
return 'feature';
if (/fix|bug|issue|error|exception/.test(allContent))
return 'bugfix';
if (this.config.detectRefactoring && this.isRefactoring(hunks))
return 'refactor';
return 'unknown';
}
isRefactoring(hunks) {
let totalAdds = 0, totalRemoves = 0;
for (const hunk of hunks) {
for (const change of hunk.changes) {
if (change.type === 'add')
totalAdds++;
else if (change.type === 'remove')
totalRemoves++;
}
}
const ratio = totalAdds > 0 ? totalRemoves / totalAdds : 0;
return ratio > 0.7 && ratio < 1.4 && totalAdds > 5;
}
determineSecondaryClassifications(path, hunks, primary) {
const secondary = [];
for (const [type, patterns] of Object.entries(CLASSIFICATION_PATTERNS)) {
if (type === primary)
continue;
for (const pattern of patterns) {
if (pattern.test(path)) {
secondary.push(type);
break;
}
}
}
return secondary.slice(0, 3);
}
calculateConfidence(path, hunks, primary) {
let confidence = 0.5;
for (const patterns of Object.values(CLASSIFICATION_PATTERNS)) {
for (const pattern of patterns) {
if (pattern.test(path)) {
confidence += 0.2;
break;
}
}
}
const totalChanges = hunks.reduce((sum, h) => sum + h.changes.length, 0);
if (totalChanges > 10)
confidence += 0.1;
if (totalChanges > 50)
confidence += 0.1;
if (primary !== 'unknown')
confidence += 0.1;
return Math.min(1, confidence);
}
determineImpactLevel(path, hunks) {
let score = 0;
const lowerPath = path.toLowerCase();
for (const [keyword, weight] of Object.entries(IMPACT_KEYWORDS)) {
if (lowerPath.includes(keyword))
score = Math.max(score, weight);
}
const totalChanges = hunks.reduce((sum, h) => sum + h.changes.filter(c => c.type !== 'context').length, 0);
if (totalChanges > 100)
score = Math.max(score, 2);
if (totalChanges > 300)
score = Math.max(score, 3);
if (score >= 3)
return 'critical';
if (score >= 2)
return 'high';
if (score >= 1)
return 'medium';
return 'low';
}
suggestReviewers(path, primary, impact) {
const reviewers = [];
const typeReviewers = { feature: ['tech-lead', 'product-owner'], bugfix: ['qa-engineer', 'developer'], refactor: ['senior-developer', 'architect'], docs: ['tech-writer', 'developer'], test: ['qa-engineer', 'developer'], config: ['devops', 'tech-lead'], style: ['developer'], unknown: ['developer'] };
reviewers.push(...(typeReviewers[primary] || typeReviewers.unknown));
if (impact === 'critical' || impact === 'high')
reviewers.push('security-reviewer');
if (/security|auth/.test(path))
reviewers.push('security-team');
if (/database|migration/.test(path))
reviewers.push('dba');
return [...new Set(reviewers)].slice(0, 4);
}
determineTestingStrategy(path, primary, impact) {
const strategies = [];
if (primary !== 'test')
strategies.push('unit-tests');
if (primary === 'feature')
strategies.push('integration-tests');
if (impact === 'high' || impact === 'critical') {
strategies.push('regression-tests');
strategies.push('e2e-tests');
}
if (/api|endpoint|route|handler/.test(path))
strategies.push('api-contract-tests');
if (/security|auth|crypto/.test(path))
strategies.push('security-audit');
return strategies.slice(0, 5);
}
identifyRiskFactors(path, hunks, impact) {
const risks = [];
const totalChanges = hunks.reduce((sum, h) => sum + h.changes.length, 0);
if (totalChanges > 200)
risks.push('Large change set - increased review time needed');
if (impact === 'critical')
risks.push('Critical system component - requires careful review');
if (impact === 'high')
risks.push('High-impact area - monitor after deployment');
if (/security|auth/.test(path))
risks.push('Security-sensitive code');
if (/database|migration/.test(path))
risks.push('Database changes - ensure backup strategy');
if (/config|env/.test(path))
risks.push('Configuration changes - verify all environments');
const allContent = hunks.flatMap(h => h.changes.map(c => c.content)).join('\n');
if (/TODO|FIXME|HACK/.test(allContent))
risks.push('Contains TODO/FIXME comments');
if (/password|secret|key|token/i.test(allContent))
risks.push('Potential secrets in code');
return risks.slice(0, 5);
}
computeOverallClassification(files) {
if (files.length === 0)
return { primary: 'unknown', secondary: [], confidence: 0, impactLevel: 'low', suggestedReviewers: [], testingStrategy: [], riskFactors: [] };
const primaryCounts = {};
for (const file of files) {
const p = file.classification.primary;
primaryCounts[p] = (primaryCounts[p] || 0) + 1;
}
let primary = 'unknown';
let maxCount = 0;
for (const [type, count] of Object.entries(primaryCounts)) {
if (count > maxCount) {
maxCount = count;
primary = type;
}
}
const secondaryCounts = {};
for (const file of files) {
for (const s of file.classification.secondary) {
secondaryCounts[s] = (secondaryCounts[s] || 0) + 1;
}
}
const secondary = Object.entries(secondaryCounts).sort((a, b) => b[1] - a[1]).slice(0, 3).map(([type]) => type);
const confidence = files.reduce((sum, f) => sum + f.classification.confidence, 0) / files.length;
const impactOrder = ['low', 'medium', 'high', 'critical'];
let impactLevel = 'low';
for (const file of files) {
if (impactOrder.indexOf(file.classification.impactLevel) > impactOrder.indexOf(impactLevel))
impactLevel = file.classification.impactLevel;
}
const reviewers = [...new Set(files.flatMap(f => f.classification.suggestedReviewers))].slice(0, 5);
const testingStrategy = [...new Set(files.flatMap(f => f.classification.testingStrategy))].slice(0, 5);
const riskFactors = [...new Set(files.flatMap(f => f.classification.riskFactors))].slice(0, 5);
return { primary, secondary, confidence, impactLevel, suggestedReviewers: reviewers, testingStrategy, riskFactors };
}
}
export function createDiffClassifier(config) {
return new DiffClassifier(config);
}
// ============================================================================
// Optimized Git Diff Functions
// ============================================================================
// Cache for diff results (TTL-based)
const diffCache = new Map();
const CACHE_TTL_MS = 5000; // 5 seconds - short TTL since diffs change frequently
/**
* Validate git ref to prevent command injection
* Only allows safe characters: alphanumeric, -, _, /, ., ~, ^
*/
function validateGitRef(ref) {
// Block shell metacharacters and dangerous patterns
if (!/^[a-zA-Z0-9_\-./~^@]+$/.test(ref)) {
throw new Error(`Invalid git ref: contains unsafe characters`);
}
// Block multiple dots (path traversal)
if (ref.includes('..') && !ref.match(/^[a-zA-Z0-9_\-]+\.\.\.?[a-zA-Z0-9_\-]+$/)) {
if (!/^\w+\.\.[.\w]+$/.test(ref)) {
throw new Error(`Invalid git ref: suspicious pattern`);
}
}
// Max length check
if (ref.length > 256) {
throw new Error(`Invalid git ref: too long`);
}
}
/**
* Get git diff statistics using SINGLE combined command (optimized)
* Replaces two separate git commands with one
*/
export function getGitDiffNumstat(ref = 'HEAD') {
// SECURITY: Validate git ref to prevent command injection
validateGitRef(ref);
// Check cache first
const cacheKey = `numstat:${ref}`;
const cached = diffCache.get(cacheKey);
if (cached && Date.now() - cached.timestamp < CACHE_TTL_MS) {
return cached.files;
}
const { execFileSync } = require('child_process');
try {
// SECURITY: Use execFileSync with args array instead of shell string
// This prevents command injection via the ref parameter
const numstatOutput = execFileSync('git', [
'diff', '--numstat', '--diff-filter=ACDMRTUXB', ref
], { encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 });
const statusOutput = execFileSync('git', [
'diff', '--name-status', ref
], { encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 });
const output = numstatOutput + '---STATUS---' + statusOutput;
const [numstatPart, statusPart] = output.split('---STATUS---');
// Parse status (usually smaller, parse first)
const statusMap = new Map();
if (statusPart) {
for (const line of statusPart.trim().split('\n')) {
if (!line)
continue;
const [status, ...pathParts] = line.split('\t');
const path = pathParts[pathParts.length - 1] || pathParts[0];
if (path)
statusMap.set(path, status.charAt(0));
}
}
// Parse numstat
const files = [];
if (numstatPart) {
for (const line of numstatPart.trim().split('\n')) {
if (!line)
continue;
const [addStr, delStr, path] = line.split('\t');
if (!path)
continue;
const additions = addStr === '-' ? 0 : parseInt(addStr, 10) || 0;
const deletions = delStr === '-' ? 0 : parseInt(delStr, 10) || 0;
const binary = addStr === '-' && delStr === '-';
const statusChar = statusMap.get(path) || 'M';
let status = 'modified';
switch (statusChar) {
case 'A':
status = 'added';
break;
case 'D':
status = 'deleted';
break;
case 'R':
status = 'renamed';
break;
default: status = 'modified';
}
files.push({ path, status, additions, deletions, hunks: 1, binary });
}
}
// Cache the result
diffCache.set(cacheKey, { files, timestamp: Date.now() });
return files;
}
catch {
return [];
}
}
/**
* Async version of getGitDiffNumstat for non-blocking operation
*/
export async function getGitDiffNumstatAsync(ref = 'HEAD') {
// SECURITY: Validate git ref to prevent command injection
validateGitRef(ref);
// Check cache first
const cacheKey = `numstat:${ref}`;
const cached = diffCache.get(cacheKey);
if (cached && Date.now() - cached.timestamp < CACHE_TTL_MS) {
return cached.files;
}
const { execFile } = require('child_process');
const { promisify } = require('util');
const execFileAsync = promisify(execFile);
try {
// SECURITY: Use execFile with args array instead of shell string
const { stdout: numstatOutput } = await execFileAsync('git', [
'diff', '--numstat', '--diff-filter=ACDMRTUXB', ref
], { maxBuffer: 10 * 1024 * 1024 });
const { stdout: statusOutput } = await execFileAsync('git', [
'diff', '--name-status', ref
], { maxBuffer: 10 * 1024 * 1024 });
const stdout = numstatOutput + '---STATUS---' + statusOutput;
const [numstatPart, statusPart] = stdout.split('---STATUS---');
const statusMap = new Map();
if (statusPart) {
for (const line of statusPart.trim().split('\n')) {
if (!line)
continue;
const [status, ...pathParts] = line.split('\t');
const path = pathParts[pathParts.length - 1] || pathParts[0];
if (path)
statusMap.set(path, status.charAt(0));
}
}
const files = [];
if (numstatPart) {
for (const line of numstatPart.trim().split('\n')) {
if (!line)
continue;
const [addStr, delStr, path] = line.split('\t');
if (!path)
continue;
const additions = addStr === '-' ? 0 : parseInt(addStr, 10) || 0;
const deletions = delStr === '-' ? 0 : parseInt(delStr, 10) || 0;
const binary = addStr === '-' && delStr === '-';
const statusChar = statusMap.get(path) || 'M';
let status = 'modified';
switch (statusChar) {
case 'A':
status = 'added';
break;
case 'D':
status = 'deleted';
break;
case 'R':
status = 'renamed';
break;
default: status = 'modified';
}
files.push({ path, status, additions, deletions, hunks: 1, binary });
}
}
diffCache.set(cacheKey, { files, timestamp: Date.now() });
return files;
}
catch {
return [];
}
}
/**
* Clear the diff cache (call when git state changes)
*/
export function clearDiffCache() {
diffCache.clear();
}
/**
* Assess risk for a single file
*/
export function assessFileRisk(file) {
const reasons = [];
let score = 0;
// Size-based risk
const totalChanges = file.additions + file.deletions;
if (totalChanges > 300) {
score += 30;
reasons.push('Large change size (>300 lines)');
}
else if (totalChanges > 100) {
score += 15;
reasons.push('Medium change size (>100 lines)');
}
// Path-based risk
const lowerPath = file.path.toLowerCase();
if (/security|auth|crypto|password/.test(lowerPath)) {
score += 40;
reasons.push('Security-sensitive file');
}
if (/payment|billing|transaction/.test(lowerPath)) {
score += 35;
reasons.push('Payment-related file');
}
if (/database|migration|schema/.test(lowerPath)) {
score += 25;
reasons.push('Database-related file');
}
if (/core|main|index/.test(lowerPath)) {
score += 15;
reasons.push('Core module');
}
if (/config|env|settings/.test(lowerPath)) {
score += 20;
reasons.push('Configuration file');
}
// Status-based risk
if (file.status === 'deleted') {
score += 10;
reasons.push('File deleted');
}
// Binary file risk
if (file.binary) {
score += 5;
reasons.push('Binary file');
}
let risk = 'low';
if (score >= 60)
risk = 'critical';
else if (score >= 40)
risk = 'high';
else if (score >= 20)
risk = 'medium';
return { file: file.path, risk, score: Math.min(100, score), reasons };
}
/**
* Assess overall risk from files and file risks
*/
export function assessOverallRisk(files, fileRisks) {
const breakdown = { low: 0, medium: 0, high: 0, critical: 0 };
let totalScore = 0;
for (const fr of fileRisks) {
breakdown[fr.risk]++;
totalScore += fr.score;
}
const avgScore = fileRisks.length > 0 ? totalScore / fileRisks.length : 0;
// Weight more heavily towards high/critical files
const weightedScore = avgScore + (breakdown.critical * 15) + (breakdown.high * 10);
let overall = 'low';
if (weightedScore >= 60 || breakdown.critical > 0)
overall = 'critical';
else if (weightedScore >= 40 || breakdown.high > 1)
overall = 'high';
else if (weightedScore >= 20 || breakdown.medium > 2)
overall = 'medium';
return { overall, score: Math.min(100, Math.round(weightedScore)), breakdown };
}
// Singleton classifier instance for reuse
let classifierInstance = null;
function getClassifier() {
if (!classifierInstance) {
classifierInstance = new DiffClassifier();
}
return classifierInstance;
}
/**
* Classify a diff based on files (uses singleton classifier)
*/
export function classifyDiff(files) {
const classifier = getClassifier();
const fileDiffs = files.map(f => ({
path: f.path,
hunks: [],
additions: f.additions,
deletions: f.deletions,
classification: classifier['classifyFile'](f.path, []),
}));
return classifier['computeOverallClassification'](fileDiffs);
}
/**
* Suggest reviewers based on files and risks
*/
export function suggestReviewers(files, fileRisks) {
const reviewers = new Set();
for (const file of files) {
const lowerPath = file.path.toLowerCase();
if (/security|auth|crypto/.test(lowerPath))
reviewers.add('security-team');
if (/database|migration/.test(lowerPath))
reviewers.add('dba');
if (/api|endpoint|route/.test(lowerPath))
reviewers.add('api-owner');
if (/test|spec/.test(lowerPath))
reviewers.add('qa-engineer');
if (/config|deploy|ci/.test(lowerPath))
reviewers.add('devops');
if (/ui|component|style/.test(lowerPath))
reviewers.add('frontend-lead');
if (/model|service|repository/.test(lowerPath))
reviewers.add('backend-lead');
}
// Add based on risk
const hasHighRisk = fileRisks.some(fr => fr.risk === 'high' || fr.risk === 'critical');
if (hasHighRisk) {
reviewers.add('tech-lead');
reviewers.add('senior-developer');
}
// Default reviewer
if (reviewers.size === 0) {
reviewers.add('developer');
}
return Array.from(reviewers).slice(0, 5);
}
// Analysis result cache
const analysisCache = new Map();
const ANALYSIS_CACHE_TTL_MS = 3000; // 3 seconds
/**
* Analyze a diff with full analysis (optimized with caching)
*/
export async function analyzeDiff(options) {
const ref = options.ref || 'HEAD';
// Check analysis cache (unless skipCache is true)
if (!options.skipCache) {
const cached = analysisCache.get(ref);
if (cached && Date.now() - cached.timestamp < ANALYSIS_CACHE_TTL_MS) {
return cached.result;
}
}
// Use async git diff for non-blocking operation
const files = await getGitDiffNumstatAsync(ref);
// Parallel file risk assessment for large diffs
const fileRisks = files.length > 20
? await Promise.all(files.map(f => Promise.resolve(assessFileRisk(f))))
: files.map(assessFileRisk);
const risk = assessOverallRisk(files, fileRisks);
const classification = classifyDiff(files);
const recommendedReviewers = suggestReviewers(files, fileRisks);
const totalAdditions = files.reduce((sum, f) => sum + f.additions, 0);
const totalDeletions = files.reduce((sum, f) => sum + f.deletions, 0);
const result = {
ref,
timestamp: Date.now(),
files,
risk,
classification,
summary: `${files.length} files changed (+${totalAdditions}/-${totalDeletions}), ${risk.overall} risk`,
fileRisks,
recommendedReviewers,
};
// Cache the result
analysisCache.set(ref, { result, timestamp: Date.now() });
return result;
}
/**
* Synchronous version of analyzeDiff for backward compatibility
*/
export function analyzeDiffSync(options) {
const ref = options.ref || 'HEAD';
// Check analysis cache
const cached = analysisCache.get(ref);
if (cached && Date.now() - cached.timestamp < ANALYSIS_CACHE_TTL_MS) {
return cached.result;
}
const files = getGitDiffNumstat(ref);
const fileRisks = files.map(assessFileRisk);
const risk = assessOverallRisk(files, fileRisks);
const classification = classifyDiff(files);
const recommendedReviewers = suggestReviewers(files, fileRisks);
const totalAdditions = files.reduce((sum, f) => sum + f.additions, 0);
const totalDeletions = files.reduce((sum, f) => sum + f.deletions, 0);
const result = {
ref,
timestamp: Date.now(),
files,
risk,
classification,
summary: `${files.length} files changed (+${totalAdditions}/-${totalDeletions}), ${risk.overall} risk`,
fileRisks,
recommendedReviewers,
};
analysisCache.set(ref, { result, timestamp: Date.now() });
return result;
}
/**
* Clear all diff-related caches
*/
export function clearAllDiffCaches() {
diffCache.clear();
analysisCache.clear();
classifierInstance?.clearCache();
}
//# sourceMappingURL=diff-classifier.js.map