UNPKG

quality-mcp

Version:

An MCP server that analyzes to your codebase, with plugin support for DCD and Simian. 🏍️ "The only Zen you find on the tops of mountains is the Zen you bring up there."

447 lines (391 loc) 13.1 kB
/** * Simian Output Parser * Parses Simian's output formats into structured data optimized for AI consumption */ import { createLogger } from '../../utils/logger.js'; const logger = createLogger('simian-parser'); /** * Dependency injection function for output parser * @returns {Object} Dependencies object */ export function getDeps() { return { logger, }; } /** * Simian Output Parser - converts Simian output to structured data */ export class SimianOutputParser { constructor() { this.formatters = { xml: this.parseXml.bind(this), plain: this.parsePlain.bind(this), yaml: this.parseYaml.bind(this), }; } /** * Parse Simian output based on formatter type * @param {string} output - Raw Simian output * @param {string} formatter - Output format (xml, plain, yaml) * @param {Function} _getDeps - Dependency injection function * @returns {Object} Structured analysis result */ parse(output, formatter = 'xml', _getDeps = getDeps) { const { logger } = _getDeps(); logger.debug(`Parsing Simian output with formatter: ${formatter}`); if (!this.formatters[formatter]) { throw new Error(`Unsupported formatter: ${formatter}`); } try { const result = this.formatters[formatter](output); logger.debug(`Parsed ${result.duplicates?.length || 0} duplicate blocks`); return result; } catch (error) { logger.error('Failed to parse Simian output:', error); throw error; } } /** * Parse XML format output (default Simian format) * @param {string} xmlOutput - XML output from Simian * @returns {Object} Structured analysis result */ parseXml(xmlOutput) { // Simple XML parsing - in production, you'd use a proper XML parser const duplicates = []; // Extract summary information const summary = this.extractSummaryFromXml(xmlOutput); // Extract duplicate blocks (Simian uses <set> tags) const duplicateMatches = xmlOutput.match(/<set[^>]*>[\s\S]*?<\/set>/g); if (duplicateMatches) { for (const match of duplicateMatches) { const duplicate = this.parseDuplicationBlock(match); if (duplicate) { duplicates.push(duplicate); } } } return { summary, duplicates, rawOutput: xmlOutput, }; } /** * Extract summary information from XML */ extractSummaryFromXml(xmlOutput) { const summary = { totalFiles: 0, duplicateLines: 0, duplicateBlocks: 0, analysisTime: 'unknown', }; // Extract summary from Simian XML format const summaryMatch = xmlOutput.match(/<summary[^>]*>/); if (summaryMatch) { const summaryTag = summaryMatch[0]; const duplicateFileCountMatch = summaryTag.match(/duplicateFileCount="(\d+)"/); const duplicateLineCountMatch = summaryTag.match(/duplicateLineCount="(\d+)"/); const duplicateBlockCountMatch = summaryTag.match(/duplicateBlockCount="(\d+)"/); const totalFileCountMatch = summaryTag.match(/totalFileCount="(\d+)"/); if (duplicateFileCountMatch) { summary.totalFiles = parseInt(duplicateFileCountMatch[1]); } if (duplicateLineCountMatch) { summary.duplicateLines = parseInt(duplicateLineCountMatch[1]); } if (duplicateBlockCountMatch) { summary.duplicateBlocks = parseInt(duplicateBlockCountMatch[1]); } if (totalFileCountMatch) { summary.totalFiles = parseInt(totalFileCountMatch[1]); } } return summary; } /** * Parse individual duplication block from XML */ parseDuplicationBlock(blockXml) { const duplicate = { fingerprint: '', lineCount: 0, occurrences: [], }; // Extract line count from Simian XML format const linesMatch = blockXml.match(/lineCount="(\d+)"/); duplicate.lineCount = linesMatch ? parseInt(linesMatch[1]) : 0; // Extract fingerprint from Simian XML format const fingerprintMatch = blockXml.match(/fingerprint="([^"]+)"/); duplicate.fingerprint = fingerprintMatch ? fingerprintMatch[1] : this.generateFingerprint(blockXml); // Extract file occurrences from Simian XML format (uses <block> tags) const blockMatches = blockXml.match(/<block[^>]*>/g); if (blockMatches) { for (const blockMatch of blockMatches) { const sourceFileMatch = blockMatch.match(/sourceFile="([^"]+)"/); const startLineMatch = blockMatch.match(/startLineNumber="(\d+)"/); const endLineMatch = blockMatch.match(/endLineNumber="(\d+)"/); if (sourceFileMatch && startLineMatch && endLineMatch) { duplicate.occurrences.push({ file: sourceFileMatch[1], startLine: parseInt(startLineMatch[1]), endLine: parseInt(endLineMatch[1]), }); } } } return duplicate.occurrences.length > 0 ? duplicate : null; } /** * Parse plain text format output * @param {string} plainOutput - Plain text output from Simian * @returns {Object} Structured analysis result */ parsePlain(plainOutput) { const lines = plainOutput.split('\n'); const duplicates = []; let currentDuplicate = null; const summary = { totalFiles: 0, duplicateLines: 0, duplicateBlocks: 0, analysisTime: 'unknown', }; for (const line of lines) { const trimmed = line.trim(); if (trimmed.startsWith('Found') && trimmed.includes('duplicate lines')) { // Summary line: "Found 45 duplicate lines in 2 blocks" const match = trimmed.match(/Found (\d+) duplicate lines in (\d+) blocks/); if (match) { summary.duplicateLines = parseInt(match[1]); summary.duplicateBlocks = parseInt(match[2]); } } else if (trimmed.includes('duplicate lines') && trimmed.includes('in:')) { // Start of new duplicate block const linesMatch = trimmed.match(/(\d+) duplicate lines/); if (linesMatch) { currentDuplicate = { fingerprint: this.generateFingerprint(trimmed), lineCount: parseInt(linesMatch[1]), occurrences: [], }; } } else if (currentDuplicate && trimmed.includes(':') && trimmed.match(/\d+/)) { // File occurrence line: "path/to/file.js:45" const parts = trimmed.split(':'); if (parts.length >= 2) { const filePath = parts.slice(0, -1).join(':'); const lineNumber = parseInt(parts[parts.length - 1]); currentDuplicate.occurrences.push({ file: filePath.trim(), startLine: lineNumber, endLine: lineNumber + currentDuplicate.lineCount - 1, }); } } else if (currentDuplicate && trimmed === '') { // End of current duplicate block if (currentDuplicate.occurrences.length > 0) { duplicates.push(currentDuplicate); } currentDuplicate = null; } } // Add last duplicate if exists if (currentDuplicate && currentDuplicate.occurrences.length > 0) { duplicates.push(currentDuplicate); } return { summary, duplicates, rawOutput: plainOutput, }; } /** * Parse YAML format output * @param {string} yamlOutput - YAML output from Simian * @returns {Object} Structured analysis result */ parseYaml(yamlOutput) { // Simple YAML parsing - in production, you'd use a proper YAML parser const duplicates = []; const summary = { totalFiles: 0, duplicateLines: 0, duplicateBlocks: 0, analysisTime: 'unknown', }; // Basic YAML parsing for Simian output const lines = yamlOutput.split('\n'); let currentDuplicate = null; for (const line of lines) { const trimmed = line.trim(); if (trimmed.startsWith('- duplicate:')) { // Start of new duplicate currentDuplicate = { fingerprint: '', lineCount: 0, occurrences: [], }; } else if (currentDuplicate && trimmed.startsWith('lines:')) { const match = trimmed.match(/lines:\s*(\d+)/); if (match) { currentDuplicate.lineCount = parseInt(match[1]); } } else if (currentDuplicate && trimmed.startsWith('- file:')) { const fileMatch = trimmed.match(/file:\s*"?([^"]+)"?/); if (fileMatch) { currentDuplicate.occurrences.push({ file: fileMatch[1], startLine: 0, // YAML format may not include line numbers endLine: currentDuplicate.lineCount, }); } } else if (currentDuplicate && (trimmed === '' || trimmed.startsWith('- duplicate:'))) { // End of current duplicate if (currentDuplicate.occurrences.length > 0) { currentDuplicate.fingerprint = this.generateFingerprint(JSON.stringify(currentDuplicate)); duplicates.push(currentDuplicate); } if (trimmed.startsWith('- duplicate:')) { currentDuplicate = { fingerprint: '', lineCount: 0, occurrences: [], }; } else { currentDuplicate = null; } } } // Add last duplicate if exists if (currentDuplicate && currentDuplicate.occurrences.length > 0) { currentDuplicate.fingerprint = this.generateFingerprint(JSON.stringify(currentDuplicate)); duplicates.push(currentDuplicate); } summary.duplicateBlocks = duplicates.length; summary.duplicateLines = duplicates.reduce((total, dup) => { return total + dup.lineCount; }, 0); return { summary, duplicates, rawOutput: yamlOutput, }; } /** * Generate a fingerprint for duplicate identification * @param {string} content - Content to generate fingerprint from * @returns {string} Fingerprint hash */ generateFingerprint(content) { // Simple hash function - in production, use crypto let hash = 0; for (let i = 0; i < content.length; i++) { const char = content.charCodeAt(i); hash = (hash << 5) - hash + char; hash = hash & hash; // Convert to 32-bit integer } return Math.abs(hash).toString(16); } /** * Enhance analysis results with AI-friendly metadata * @param {Object} analysis - Parsed analysis result * @returns {Object} Enhanced analysis result */ enhanceForAI(analysis) { const enhanced = { ...analysis }; // Add AI-friendly summary enhanced.aiSummary = this.generateAISummary(analysis); // Add prioritized recommendations enhanced.recommendations = this.generateRecommendations(analysis); // Add severity classification enhanced.severity = this.classifySeverity(analysis); return enhanced; } /** * Generate AI-friendly summary */ generateAISummary(analysis) { const { summary, duplicates } = analysis; return { overview: `Found ${summary.duplicateBlocks} duplicate code blocks across ${summary.totalFiles} files, totaling ${summary.duplicateLines} duplicate lines.`, impact: this.assessImpact(analysis), topDuplicates: duplicates .sort((a, b) => { return b.lineCount - a.lineCount; }) .slice(0, 5) .map(dup => { return { size: dup.lineCount, occurrences: dup.occurrences.length, files: dup.occurrences.map(occ => { return occ.file; }), }; }), }; } /** * Generate actionable recommendations */ generateRecommendations(analysis) { const recommendations = []; for (const duplicate of analysis.duplicates) { if (duplicate.lineCount >= 10 && duplicate.occurrences.length >= 3) { recommendations.push({ type: 'extract_function', priority: 'high', description: `Extract ${duplicate.lineCount}-line duplicate found in ${duplicate.occurrences.length} files`, files: duplicate.occurrences.map(occ => { return occ.file; }), estimatedEffort: 'medium', }); } } return recommendations; } /** * Classify severity of duplication */ classifySeverity(analysis) { const { summary } = analysis; const duplicateRatio = summary.duplicateLines / Math.max(summary.totalFiles * 50, 1); // Assume avg 50 lines per file if (duplicateRatio > 0.3) { return 'high'; } if (duplicateRatio > 0.15) { return 'medium'; } return 'low'; } /** * Assess impact of duplicates */ assessImpact(analysis) { const impacts = []; if (analysis.summary.duplicateLines > 100) { impacts.push('maintainability'); } if ( analysis.duplicates.some(d => { return d.occurrences.length > 3; }) ) { impacts.push('consistency'); } if ( analysis.duplicates.some(d => { return d.lineCount > 20; }) ) { impacts.push('complexity'); } return impacts; } }