UNPKG

mcp-adr-analysis-server

Version:

MCP server for analyzing Architectural Decision Records and project architecture

422 lines 17.3 kB
/** * LLM Artifact Detection System * * Detects files and content generated by LLMs that may not be suitable for production * Focuses on development artifacts, debug scripts, and temporary files */ import { basename, dirname } from 'path'; /** * Comprehensive LLM artifact patterns */ export const LLM_ARTIFACT_PATTERNS = [ // Debug Scripts { name: 'debug-script', description: 'Debug script typically generated for troubleshooting', category: 'debug', severity: 'warning', filePattern: /^debug_.*\.(py|js|ts|sh|rb|go|php|java)$/, contentPattern: /print\s*\(\s*["'].*(?:debug|test|trace).*["']\s*\)|console\.log\s*\(\s*["'].*debug.*["']\s*\)|logging\.debug|debugger\s*;/i, locationExceptions: ['debug/', 'scripts/', 'tools/', 'dev/', 'utils/'], confidence: 0.8 }, // Test Files in Wrong Location { name: 'misplaced-test', description: 'Test file not in proper test directory', category: 'testing', severity: 'error', filePattern: /^test_.*\.(py|js|ts|rb|go|php|java)$|.*\.test\.(js|ts|py)$|.*\.spec\.(js|ts|py)$/, locationExceptions: ['tests/', 'test/', '__tests__/', 'spec/', 'specs/'], confidence: 0.9 }, // Mock/Fixture Files { name: 'mock-data', description: 'Mock data or fixture files', category: 'testing', severity: 'info', filePattern: /^mock_.*\.(json|js|ts|py|yaml|yml)$|^fixture_.*\.(json|js|ts|py|yaml|yml)$/, contentPattern: /mock|fixture|dummy.*data|fake.*data|test.*data/i, locationExceptions: ['tests/', 'test/', '__tests__/', 'fixtures/', 'mocks/', 'dev/'], confidence: 0.7 }, // Temporary Files { name: 'temporary-file', description: 'Temporary file that should not be committed', category: 'temporary', severity: 'error', filePattern: /^temp_.*|.*\.tmp$|.*\.temp$|.*\.bak$|.*\.orig$|^scratch_.*|^playground_.*/, locationExceptions: ['tmp/', 'temp/', 'dev/'], confidence: 0.9 }, // Experimental Code { name: 'experimental-code', description: 'Experimental or prototype code', category: 'experimental', severity: 'warning', filePattern: /^experiment_.*|^poc_.*|^prototype_.*|^try_.*|^attempt_.*/, contentPattern: /\/\*.*experiment.*\*\/|#.*experiment|\/\/.*experiment|TODO.*experiment/i, locationExceptions: ['experiments/', 'poc/', 'prototypes/', 'dev/', 'playground/'], confidence: 0.8 }, // Tutorial/Learning Files { name: 'tutorial-file', description: 'Tutorial or learning file with step-by-step instructions', category: 'tutorial', severity: 'info', filePattern: /^tutorial_.*|^learn_.*|^example_.*|^demo_.*|^walkthrough_.*/, contentPattern: /step\s+\d+|tutorial|walkthrough|follow.*along|this.*example.*shows/i, locationExceptions: ['tutorials/', 'examples/', 'demos/', 'docs/', 'learning/'], confidence: 0.7 }, // Documentation Drafts { name: 'documentation-draft', description: 'Draft documentation that may not be ready', category: 'documentation', severity: 'info', filePattern: /^draft_.*\.md$|^notes_.*\.md$|^wip_.*\.md$|^todo_.*\.md$/, contentPattern: /\[draft\]|\[wip\]|\[todo\]|work.*in.*progress|need.*to.*update/i, locationExceptions: ['docs/', 'documentation/', 'drafts/', 'dev/'], confidence: 0.6 }, // LLM Conversation Logs { name: 'llm-conversation', description: 'LLM conversation log or output', category: 'temporary', severity: 'warning', filePattern: /^llm_.*|^ai_.*|^claude_.*|^gpt_.*|^chat_.*\.txt$|^conversation_.*\.txt$/, contentPattern: /human:|assistant:|claude:|gpt:|user:|system:|you:|me:|i'll.*help.*you/i, locationExceptions: ['dev/', 'logs/', 'temp/'], confidence: 0.9 }, // Analysis/Report Files { name: 'analysis-report', description: 'Analysis or report file that may be temporary', category: 'temporary', severity: 'info', filePattern: /^analysis_.*|^report_.*|^output_.*|^results_.*\.(txt|md|json|csv)$/, contentPattern: /analysis.*results|generated.*on|report.*summary|output.*from/i, locationExceptions: ['reports/', 'analysis/', 'output/', 'dev/'], confidence: 0.5 }, // Configuration Experiments { name: 'config-experiment', description: 'Experimental configuration file', category: 'experimental', severity: 'warning', filePattern: /^test_.*\.config\.(js|json|yaml|yml)$|^experiment_.*\.config\.(js|json|yaml|yml)$/, contentPattern: /test.*config|experiment.*config|temporary.*config/i, locationExceptions: ['configs/', 'dev/', 'experiments/'], confidence: 0.7 }, // Utility Scripts { name: 'utility-script', description: 'Utility script that may be temporary', category: 'debug', severity: 'info', filePattern: /^util_.*|^helper_.*|^tool_.*\.(py|js|ts|sh|rb)$/, contentPattern: /quick.*script|helper.*function|utility.*tool|one.*time.*use/i, locationExceptions: ['utils/', 'helpers/', 'tools/', 'scripts/'], confidence: 0.6 }, // Code Generation Artifacts { name: 'code-generation', description: 'Generated code that may need review', category: 'experimental', severity: 'info', filePattern: /^generated_.*|^auto_.*|^codegen_.*/, contentPattern: /\/\*.*generated.*\*\/|#.*auto.*generated|\/\/.*generated|@generated/i, locationExceptions: ['generated/', 'auto/', 'codegen/', 'build/'], confidence: 0.8 }, // Learning/Practice Files { name: 'learning-file', description: 'Learning or practice file', category: 'tutorial', severity: 'info', filePattern: /^practice_.*|^learning_.*|^exercise_.*|^kata_.*/, contentPattern: /practice|exercise|kata|learning|studying|following.*tutorial/i, locationExceptions: ['practice/', 'learning/', 'exercises/', 'dev/'], confidence: 0.7 }, // Verbose Comments (LLM-style) { name: 'verbose-comments', description: 'Unusually verbose comments typical of LLM generation', category: 'documentation', severity: 'info', combinedPattern: (_fileName, content) => { // Check for excessive commenting ratio const lines = content.split('\n'); const commentLines = lines.filter(line => line.trim().startsWith('//') || line.trim().startsWith('#') || line.trim().startsWith('/*') || line.trim().startsWith('*') || line.trim().startsWith('"""') || line.trim().startsWith("'''")); // If more than 40% of lines are comments, it might be LLM-generated const commentRatio = commentLines.length / lines.length; return commentRatio > 0.4 && content.length > 500; }, locationExceptions: ['docs/', 'examples/', 'tutorials/'], confidence: 0.4 } ]; /** * Detect LLM artifacts in a file */ export function detectLLMArtifacts(filePath, content, customPatterns = []) { const fileName = basename(filePath); const dirPath = dirname(filePath); const allPatterns = [...LLM_ARTIFACT_PATTERNS, ...customPatterns]; const matches = []; // Check each pattern for (const pattern of allPatterns) { const patternMatches = checkPattern(pattern, fileName, content, filePath); matches.push(...patternMatches); } // Calculate overall confidence const overallConfidence = matches.length > 0 ? matches.reduce((sum, match) => sum + match.confidence, 0) / matches.length : 0; // Determine severity const severity = matches.length > 0 ? matches.reduce((highest, match) => { const severityOrder = { error: 3, warning: 2, info: 1 }; return severityOrder[match.pattern.severity] > severityOrder[highest] ? match.pattern.severity : highest; }, 'info') : 'info'; // Check if current location is allowed const allowedInCurrentLocation = matches.length === 0 || matches.every(match => match.pattern.locationExceptions.some(exception => dirPath.includes(exception) || dirPath.endsWith(exception.replace('/', '')))); // Generate recommendations const recommendations = generateRecommendations(matches, filePath, allowedInCurrentLocation); return { filePath, isLLMArtifact: matches.length > 0, matches, overallConfidence, severity, recommendations, allowedInCurrentLocation }; } /** * Check a specific pattern against file and content */ function checkPattern(pattern, fileName, content, filePath) { const matches = []; // Check filename pattern if (pattern.filePattern && pattern.filePattern.test(fileName)) { matches.push({ pattern, matchType: 'filename', match: fileName, confidence: pattern.confidence, suggestions: generateSuggestions(pattern, filePath, 'filename') }); } // Check content pattern if (pattern.contentPattern && content) { const contentMatches = content.match(pattern.contentPattern); if (contentMatches) { const lines = content.split('\n'); for (let i = 0; i < lines.length; i++) { if (pattern.contentPattern.test(lines[i])) { matches.push({ pattern, matchType: 'content', match: lines[i] || '', line: i + 1, context: getLineContext(lines, i, 2), confidence: pattern.confidence * 0.9, // Slightly lower confidence for content matches suggestions: generateSuggestions(pattern, filePath, 'content') }); break; // Only report first match per pattern } } } } // Check combined pattern if (pattern.combinedPattern && pattern.combinedPattern(fileName, content)) { matches.push({ pattern, matchType: 'combined', match: 'Pattern detected in file structure and content', confidence: pattern.confidence, suggestions: generateSuggestions(pattern, filePath, 'combined') }); } return matches; } /** * Get context around a line */ function getLineContext(lines, lineIndex, contextSize) { const start = Math.max(0, lineIndex - contextSize); const end = Math.min(lines.length, lineIndex + contextSize + 1); return lines.slice(start, end).join('\n'); } /** * Generate suggestions for handling LLM artifacts */ function generateSuggestions(pattern, filePath, _matchType) { const suggestions = []; const fileName = basename(filePath); // Category-specific suggestions switch (pattern.category) { case 'debug': suggestions.push(`Move ${fileName} to scripts/ or tools/ directory`); suggestions.push('Add to .gitignore if it\'s temporary'); suggestions.push('Remove debug logging before commit'); break; case 'testing': suggestions.push(`Move ${fileName} to tests/ directory`); suggestions.push('Ensure test follows project testing conventions'); break; case 'temporary': suggestions.push('Remove file if no longer needed'); suggestions.push('Add to .gitignore'); suggestions.push('Move to tmp/ directory if needed'); break; case 'experimental': suggestions.push(`Move ${fileName} to experiments/ or dev/ directory`); suggestions.push('Add documentation about experimental status'); suggestions.push('Consider if this should be in version control'); break; case 'tutorial': suggestions.push(`Move ${fileName} to examples/ or tutorials/ directory`); suggestions.push('Add to documentation structure'); break; case 'documentation': suggestions.push(`Move ${fileName} to docs/ directory`); suggestions.push('Review and finalize before committing'); break; } // Location-specific suggestions if (pattern.locationExceptions.length > 0) { const allowedDirs = pattern.locationExceptions.join(', '); suggestions.push(`Consider moving to: ${allowedDirs}`); } // Severity-specific suggestions if (pattern.severity === 'error') { suggestions.push('🚨 This file should not be committed in its current location'); } else if (pattern.severity === 'warning') { suggestions.push('⚠️ Review this file before committing'); } return suggestions; } /** * Generate recommendations for the overall result */ function generateRecommendations(matches, _filePath, allowedInCurrentLocation) { const recommendations = []; if (matches.length === 0) { return ['File appears to be a legitimate source file']; } if (allowedInCurrentLocation) { recommendations.push('✅ File is in an appropriate location for its type'); } else { recommendations.push('❌ File should be moved to an appropriate directory'); // Add specific suggestions from matches const uniqueSuggestions = new Set(); matches.forEach(match => { match.suggestions.forEach(suggestion => { if (suggestion.includes('Move') || suggestion.includes('scripts/') || suggestion.includes('tests/') || suggestion.includes('tools/')) { uniqueSuggestions.add(suggestion); } }); }); uniqueSuggestions.forEach(suggestion => { recommendations.push(suggestion); }); } // Count by severity const errorCount = matches.filter(m => m.pattern.severity === 'error').length; const warningCount = matches.filter(m => m.pattern.severity === 'warning').length; if (errorCount > 0) { recommendations.push(`🚨 ${errorCount} critical issue(s) - file should not be committed`); } if (warningCount > 0) { recommendations.push(`⚠️ ${warningCount} warning(s) - review before committing`); } // Category-specific recommendations const categories = [...new Set(matches.map(m => m.pattern.category))]; if (categories.includes('debug')) { recommendations.push('Consider if debug code is needed in repository'); } if (categories.includes('temporary')) { recommendations.push('Remove temporary files before committing'); } if (categories.includes('experimental')) { recommendations.push('Move experimental code to appropriate directory'); } return recommendations; } /** * Batch analyze multiple files */ export function batchDetectLLMArtifacts(files, customPatterns = []) { return files.map(file => detectLLMArtifacts(file.path, file.content, customPatterns)); } /** * Get summary statistics for LLM artifact detection */ export function getLLMArtifactSummary(results) { const summary = { totalFiles: results.length, artifactFiles: results.filter(r => r.isLLMArtifact).length, allowedFiles: results.filter(r => r.allowedInCurrentLocation).length, errorCount: results.filter(r => r.severity === 'error').length, warningCount: results.filter(r => r.severity === 'warning').length, infoCount: results.filter(r => r.severity === 'info').length, categorySummary: {}, topPatterns: [] }; // Count by category const patternCounts = {}; for (const result of results) { for (const match of result.matches) { const category = match.pattern.category; summary.categorySummary[category] = (summary.categorySummary[category] || 0) + 1; const patternName = match.pattern.name; patternCounts[patternName] = (patternCounts[patternName] || 0) + 1; } } // Top patterns summary.topPatterns = Object.entries(patternCounts) .sort((a, b) => b[1] - a[1]) .slice(0, 5) .map(([name, count]) => ({ name, count })); return summary; } /** * Create a custom LLM artifact pattern */ export function createLLMPattern(name, description, category, severity, options) { return { name, description, category, severity, ...(options.filePattern && { filePattern: new RegExp(options.filePattern) }), ...(options.contentPattern && { contentPattern: new RegExp(options.contentPattern, 'i') }), locationExceptions: options.locationExceptions || [], confidence: options.confidence || 0.7 }; } //# sourceMappingURL=llm-artifact-detector.js.map