mcp-adr-analysis-server
Version:
MCP server for analyzing Architectural Decision Records and project architecture
422 lines • 17.3 kB
JavaScript
/**
* LLM Artifact Detection System
*
* Detects files and content generated by LLMs that may not be suitable for production
* Focuses on development artifacts, debug scripts, and temporary files
*/
import { basename, dirname } from 'path';
/**
* Comprehensive LLM artifact patterns
*/
export const LLM_ARTIFACT_PATTERNS = [
// Debug Scripts
{
name: 'debug-script',
description: 'Debug script typically generated for troubleshooting',
category: 'debug',
severity: 'warning',
filePattern: /^debug_.*\.(py|js|ts|sh|rb|go|php|java)$/,
contentPattern: /print\s*\(\s*["'].*(?:debug|test|trace).*["']\s*\)|console\.log\s*\(\s*["'].*debug.*["']\s*\)|logging\.debug|debugger\s*;/i,
locationExceptions: ['debug/', 'scripts/', 'tools/', 'dev/', 'utils/'],
confidence: 0.8
},
// Test Files in Wrong Location
{
name: 'misplaced-test',
description: 'Test file not in proper test directory',
category: 'testing',
severity: 'error',
filePattern: /^test_.*\.(py|js|ts|rb|go|php|java)$|.*\.test\.(js|ts|py)$|.*\.spec\.(js|ts|py)$/,
locationExceptions: ['tests/', 'test/', '__tests__/', 'spec/', 'specs/'],
confidence: 0.9
},
// Mock/Fixture Files
{
name: 'mock-data',
description: 'Mock data or fixture files',
category: 'testing',
severity: 'info',
filePattern: /^mock_.*\.(json|js|ts|py|yaml|yml)$|^fixture_.*\.(json|js|ts|py|yaml|yml)$/,
contentPattern: /mock|fixture|dummy.*data|fake.*data|test.*data/i,
locationExceptions: ['tests/', 'test/', '__tests__/', 'fixtures/', 'mocks/', 'dev/'],
confidence: 0.7
},
// Temporary Files
{
name: 'temporary-file',
description: 'Temporary file that should not be committed',
category: 'temporary',
severity: 'error',
filePattern: /^temp_.*|.*\.tmp$|.*\.temp$|.*\.bak$|.*\.orig$|^scratch_.*|^playground_.*/,
locationExceptions: ['tmp/', 'temp/', 'dev/'],
confidence: 0.9
},
// Experimental Code
{
name: 'experimental-code',
description: 'Experimental or prototype code',
category: 'experimental',
severity: 'warning',
filePattern: /^experiment_.*|^poc_.*|^prototype_.*|^try_.*|^attempt_.*/,
contentPattern: /\/\*.*experiment.*\*\/|#.*experiment|\/\/.*experiment|TODO.*experiment/i,
locationExceptions: ['experiments/', 'poc/', 'prototypes/', 'dev/', 'playground/'],
confidence: 0.8
},
// Tutorial/Learning Files
{
name: 'tutorial-file',
description: 'Tutorial or learning file with step-by-step instructions',
category: 'tutorial',
severity: 'info',
filePattern: /^tutorial_.*|^learn_.*|^example_.*|^demo_.*|^walkthrough_.*/,
contentPattern: /step\s+\d+|tutorial|walkthrough|follow.*along|this.*example.*shows/i,
locationExceptions: ['tutorials/', 'examples/', 'demos/', 'docs/', 'learning/'],
confidence: 0.7
},
// Documentation Drafts
{
name: 'documentation-draft',
description: 'Draft documentation that may not be ready',
category: 'documentation',
severity: 'info',
filePattern: /^draft_.*\.md$|^notes_.*\.md$|^wip_.*\.md$|^todo_.*\.md$/,
contentPattern: /\[draft\]|\[wip\]|\[todo\]|work.*in.*progress|need.*to.*update/i,
locationExceptions: ['docs/', 'documentation/', 'drafts/', 'dev/'],
confidence: 0.6
},
// LLM Conversation Logs
{
name: 'llm-conversation',
description: 'LLM conversation log or output',
category: 'temporary',
severity: 'warning',
filePattern: /^llm_.*|^ai_.*|^claude_.*|^gpt_.*|^chat_.*\.txt$|^conversation_.*\.txt$/,
contentPattern: /human:|assistant:|claude:|gpt:|user:|system:|you:|me:|i'll.*help.*you/i,
locationExceptions: ['dev/', 'logs/', 'temp/'],
confidence: 0.9
},
// Analysis/Report Files
{
name: 'analysis-report',
description: 'Analysis or report file that may be temporary',
category: 'temporary',
severity: 'info',
filePattern: /^analysis_.*|^report_.*|^output_.*|^results_.*\.(txt|md|json|csv)$/,
contentPattern: /analysis.*results|generated.*on|report.*summary|output.*from/i,
locationExceptions: ['reports/', 'analysis/', 'output/', 'dev/'],
confidence: 0.5
},
// Configuration Experiments
{
name: 'config-experiment',
description: 'Experimental configuration file',
category: 'experimental',
severity: 'warning',
filePattern: /^test_.*\.config\.(js|json|yaml|yml)$|^experiment_.*\.config\.(js|json|yaml|yml)$/,
contentPattern: /test.*config|experiment.*config|temporary.*config/i,
locationExceptions: ['configs/', 'dev/', 'experiments/'],
confidence: 0.7
},
// Utility Scripts
{
name: 'utility-script',
description: 'Utility script that may be temporary',
category: 'debug',
severity: 'info',
filePattern: /^util_.*|^helper_.*|^tool_.*\.(py|js|ts|sh|rb)$/,
contentPattern: /quick.*script|helper.*function|utility.*tool|one.*time.*use/i,
locationExceptions: ['utils/', 'helpers/', 'tools/', 'scripts/'],
confidence: 0.6
},
// Code Generation Artifacts
{
name: 'code-generation',
description: 'Generated code that may need review',
category: 'experimental',
severity: 'info',
filePattern: /^generated_.*|^auto_.*|^codegen_.*/,
contentPattern: /\/\*.*generated.*\*\/|#.*auto.*generated|\/\/.*generated|@generated/i,
locationExceptions: ['generated/', 'auto/', 'codegen/', 'build/'],
confidence: 0.8
},
// Learning/Practice Files
{
name: 'learning-file',
description: 'Learning or practice file',
category: 'tutorial',
severity: 'info',
filePattern: /^practice_.*|^learning_.*|^exercise_.*|^kata_.*/,
contentPattern: /practice|exercise|kata|learning|studying|following.*tutorial/i,
locationExceptions: ['practice/', 'learning/', 'exercises/', 'dev/'],
confidence: 0.7
},
// Verbose Comments (LLM-style)
{
name: 'verbose-comments',
description: 'Unusually verbose comments typical of LLM generation',
category: 'documentation',
severity: 'info',
combinedPattern: (_fileName, content) => {
// Check for excessive commenting ratio
const lines = content.split('\n');
const commentLines = lines.filter(line => line.trim().startsWith('//') ||
line.trim().startsWith('#') ||
line.trim().startsWith('/*') ||
line.trim().startsWith('*') ||
line.trim().startsWith('"""') ||
line.trim().startsWith("'''"));
// If more than 40% of lines are comments, it might be LLM-generated
const commentRatio = commentLines.length / lines.length;
return commentRatio > 0.4 && content.length > 500;
},
locationExceptions: ['docs/', 'examples/', 'tutorials/'],
confidence: 0.4
}
];
/**
* Detect LLM artifacts in a file
*/
export function detectLLMArtifacts(filePath, content, customPatterns = []) {
const fileName = basename(filePath);
const dirPath = dirname(filePath);
const allPatterns = [...LLM_ARTIFACT_PATTERNS, ...customPatterns];
const matches = [];
// Check each pattern
for (const pattern of allPatterns) {
const patternMatches = checkPattern(pattern, fileName, content, filePath);
matches.push(...patternMatches);
}
// Calculate overall confidence
const overallConfidence = matches.length > 0
? matches.reduce((sum, match) => sum + match.confidence, 0) / matches.length
: 0;
// Determine severity
const severity = matches.length > 0
? matches.reduce((highest, match) => {
const severityOrder = { error: 3, warning: 2, info: 1 };
return severityOrder[match.pattern.severity] > severityOrder[highest]
? match.pattern.severity
: highest;
}, 'info')
: 'info';
// Check if current location is allowed
const allowedInCurrentLocation = matches.length === 0 || matches.every(match => match.pattern.locationExceptions.some(exception => dirPath.includes(exception) || dirPath.endsWith(exception.replace('/', ''))));
// Generate recommendations
const recommendations = generateRecommendations(matches, filePath, allowedInCurrentLocation);
return {
filePath,
isLLMArtifact: matches.length > 0,
matches,
overallConfidence,
severity,
recommendations,
allowedInCurrentLocation
};
}
/**
* Check a specific pattern against file and content
*/
function checkPattern(pattern, fileName, content, filePath) {
const matches = [];
// Check filename pattern
if (pattern.filePattern && pattern.filePattern.test(fileName)) {
matches.push({
pattern,
matchType: 'filename',
match: fileName,
confidence: pattern.confidence,
suggestions: generateSuggestions(pattern, filePath, 'filename')
});
}
// Check content pattern
if (pattern.contentPattern && content) {
const contentMatches = content.match(pattern.contentPattern);
if (contentMatches) {
const lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
if (pattern.contentPattern.test(lines[i])) {
matches.push({
pattern,
matchType: 'content',
match: lines[i] || '',
line: i + 1,
context: getLineContext(lines, i, 2),
confidence: pattern.confidence * 0.9, // Slightly lower confidence for content matches
suggestions: generateSuggestions(pattern, filePath, 'content')
});
break; // Only report first match per pattern
}
}
}
}
// Check combined pattern
if (pattern.combinedPattern && pattern.combinedPattern(fileName, content)) {
matches.push({
pattern,
matchType: 'combined',
match: 'Pattern detected in file structure and content',
confidence: pattern.confidence,
suggestions: generateSuggestions(pattern, filePath, 'combined')
});
}
return matches;
}
/**
* Get context around a line
*/
function getLineContext(lines, lineIndex, contextSize) {
const start = Math.max(0, lineIndex - contextSize);
const end = Math.min(lines.length, lineIndex + contextSize + 1);
return lines.slice(start, end).join('\n');
}
/**
* Generate suggestions for handling LLM artifacts
*/
function generateSuggestions(pattern, filePath, _matchType) {
const suggestions = [];
const fileName = basename(filePath);
// Category-specific suggestions
switch (pattern.category) {
case 'debug':
suggestions.push(`Move ${fileName} to scripts/ or tools/ directory`);
suggestions.push('Add to .gitignore if it\'s temporary');
suggestions.push('Remove debug logging before commit');
break;
case 'testing':
suggestions.push(`Move ${fileName} to tests/ directory`);
suggestions.push('Ensure test follows project testing conventions');
break;
case 'temporary':
suggestions.push('Remove file if no longer needed');
suggestions.push('Add to .gitignore');
suggestions.push('Move to tmp/ directory if needed');
break;
case 'experimental':
suggestions.push(`Move ${fileName} to experiments/ or dev/ directory`);
suggestions.push('Add documentation about experimental status');
suggestions.push('Consider if this should be in version control');
break;
case 'tutorial':
suggestions.push(`Move ${fileName} to examples/ or tutorials/ directory`);
suggestions.push('Add to documentation structure');
break;
case 'documentation':
suggestions.push(`Move ${fileName} to docs/ directory`);
suggestions.push('Review and finalize before committing');
break;
}
// Location-specific suggestions
if (pattern.locationExceptions.length > 0) {
const allowedDirs = pattern.locationExceptions.join(', ');
suggestions.push(`Consider moving to: ${allowedDirs}`);
}
// Severity-specific suggestions
if (pattern.severity === 'error') {
suggestions.push('🚨 This file should not be committed in its current location');
}
else if (pattern.severity === 'warning') {
suggestions.push('⚠️ Review this file before committing');
}
return suggestions;
}
/**
* Generate recommendations for the overall result
*/
function generateRecommendations(matches, _filePath, allowedInCurrentLocation) {
const recommendations = [];
if (matches.length === 0) {
return ['File appears to be a legitimate source file'];
}
if (allowedInCurrentLocation) {
recommendations.push('✅ File is in an appropriate location for its type');
}
else {
recommendations.push('❌ File should be moved to an appropriate directory');
// Add specific suggestions from matches
const uniqueSuggestions = new Set();
matches.forEach(match => {
match.suggestions.forEach(suggestion => {
if (suggestion.includes('Move') || suggestion.includes('scripts/') || suggestion.includes('tests/') || suggestion.includes('tools/')) {
uniqueSuggestions.add(suggestion);
}
});
});
uniqueSuggestions.forEach(suggestion => {
recommendations.push(suggestion);
});
}
// Count by severity
const errorCount = matches.filter(m => m.pattern.severity === 'error').length;
const warningCount = matches.filter(m => m.pattern.severity === 'warning').length;
if (errorCount > 0) {
recommendations.push(`🚨 ${errorCount} critical issue(s) - file should not be committed`);
}
if (warningCount > 0) {
recommendations.push(`⚠️ ${warningCount} warning(s) - review before committing`);
}
// Category-specific recommendations
const categories = [...new Set(matches.map(m => m.pattern.category))];
if (categories.includes('debug')) {
recommendations.push('Consider if debug code is needed in repository');
}
if (categories.includes('temporary')) {
recommendations.push('Remove temporary files before committing');
}
if (categories.includes('experimental')) {
recommendations.push('Move experimental code to appropriate directory');
}
return recommendations;
}
/**
* Batch analyze multiple files
*/
export function batchDetectLLMArtifacts(files, customPatterns = []) {
return files.map(file => detectLLMArtifacts(file.path, file.content, customPatterns));
}
/**
* Get summary statistics for LLM artifact detection
*/
export function getLLMArtifactSummary(results) {
const summary = {
totalFiles: results.length,
artifactFiles: results.filter(r => r.isLLMArtifact).length,
allowedFiles: results.filter(r => r.allowedInCurrentLocation).length,
errorCount: results.filter(r => r.severity === 'error').length,
warningCount: results.filter(r => r.severity === 'warning').length,
infoCount: results.filter(r => r.severity === 'info').length,
categorySummary: {},
topPatterns: []
};
// Count by category
const patternCounts = {};
for (const result of results) {
for (const match of result.matches) {
const category = match.pattern.category;
summary.categorySummary[category] = (summary.categorySummary[category] || 0) + 1;
const patternName = match.pattern.name;
patternCounts[patternName] = (patternCounts[patternName] || 0) + 1;
}
}
// Top patterns
summary.topPatterns = Object.entries(patternCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, 5)
.map(([name, count]) => ({ name, count }));
return summary;
}
/**
* Create a custom LLM artifact pattern
*/
export function createLLMPattern(name, description, category, severity, options) {
return {
name,
description,
category,
severity,
...(options.filePattern && { filePattern: new RegExp(options.filePattern) }),
...(options.contentPattern && { contentPattern: new RegExp(options.contentPattern, 'i') }),
locationExceptions: options.locationExceptions || [],
confidence: options.confidence || 0.7
};
}
//# sourceMappingURL=llm-artifact-detector.js.map