quality-mcp
Version:
An MCP server that analyzes to your codebase, with plugin support for DCD and Simian. 🏍️ "The only Zen you find on the tops of mountains is the Zen you bring up there."
447 lines (391 loc) • 13.1 kB
JavaScript
/**
* Simian Output Parser
* Parses Simian's output formats into structured data optimized for AI consumption
*/
import { createLogger } from '../../utils/logger.js';
const logger = createLogger('simian-parser');
/**
* Dependency injection function for output parser
* @returns {Object} Dependencies object
*/
export function getDeps() {
return {
logger,
};
}
/**
* Simian Output Parser - converts Simian output to structured data
*/
export class SimianOutputParser {
constructor() {
this.formatters = {
xml: this.parseXml.bind(this),
plain: this.parsePlain.bind(this),
yaml: this.parseYaml.bind(this),
};
}
/**
* Parse Simian output based on formatter type
* @param {string} output - Raw Simian output
* @param {string} formatter - Output format (xml, plain, yaml)
* @param {Function} _getDeps - Dependency injection function
* @returns {Object} Structured analysis result
*/
parse(output, formatter = 'xml', _getDeps = getDeps) {
const { logger } = _getDeps();
logger.debug(`Parsing Simian output with formatter: ${formatter}`);
if (!this.formatters[formatter]) {
throw new Error(`Unsupported formatter: ${formatter}`);
}
try {
const result = this.formatters[formatter](output);
logger.debug(`Parsed ${result.duplicates?.length || 0} duplicate blocks`);
return result;
} catch (error) {
logger.error('Failed to parse Simian output:', error);
throw error;
}
}
/**
* Parse XML format output (default Simian format)
* @param {string} xmlOutput - XML output from Simian
* @returns {Object} Structured analysis result
*/
parseXml(xmlOutput) {
// Simple XML parsing - in production, you'd use a proper XML parser
const duplicates = [];
// Extract summary information
const summary = this.extractSummaryFromXml(xmlOutput);
// Extract duplicate blocks (Simian uses <set> tags)
const duplicateMatches = xmlOutput.match(/<set[^>]*>[\s\S]*?<\/set>/g);
if (duplicateMatches) {
for (const match of duplicateMatches) {
const duplicate = this.parseDuplicationBlock(match);
if (duplicate) {
duplicates.push(duplicate);
}
}
}
return {
summary,
duplicates,
rawOutput: xmlOutput,
};
}
/**
* Extract summary information from XML
*/
extractSummaryFromXml(xmlOutput) {
const summary = {
totalFiles: 0,
duplicateLines: 0,
duplicateBlocks: 0,
analysisTime: 'unknown',
};
// Extract summary from Simian XML format
const summaryMatch = xmlOutput.match(/<summary[^>]*>/);
if (summaryMatch) {
const summaryTag = summaryMatch[0];
const duplicateFileCountMatch = summaryTag.match(/duplicateFileCount="(\d+)"/);
const duplicateLineCountMatch = summaryTag.match(/duplicateLineCount="(\d+)"/);
const duplicateBlockCountMatch = summaryTag.match(/duplicateBlockCount="(\d+)"/);
const totalFileCountMatch = summaryTag.match(/totalFileCount="(\d+)"/);
if (duplicateFileCountMatch) {
summary.totalFiles = parseInt(duplicateFileCountMatch[1]);
}
if (duplicateLineCountMatch) {
summary.duplicateLines = parseInt(duplicateLineCountMatch[1]);
}
if (duplicateBlockCountMatch) {
summary.duplicateBlocks = parseInt(duplicateBlockCountMatch[1]);
}
if (totalFileCountMatch) {
summary.totalFiles = parseInt(totalFileCountMatch[1]);
}
}
return summary;
}
/**
* Parse individual duplication block from XML
*/
parseDuplicationBlock(blockXml) {
const duplicate = {
fingerprint: '',
lineCount: 0,
occurrences: [],
};
// Extract line count from Simian XML format
const linesMatch = blockXml.match(/lineCount="(\d+)"/);
duplicate.lineCount = linesMatch ? parseInt(linesMatch[1]) : 0;
// Extract fingerprint from Simian XML format
const fingerprintMatch = blockXml.match(/fingerprint="([^"]+)"/);
duplicate.fingerprint = fingerprintMatch
? fingerprintMatch[1]
: this.generateFingerprint(blockXml);
// Extract file occurrences from Simian XML format (uses <block> tags)
const blockMatches = blockXml.match(/<block[^>]*>/g);
if (blockMatches) {
for (const blockMatch of blockMatches) {
const sourceFileMatch = blockMatch.match(/sourceFile="([^"]+)"/);
const startLineMatch = blockMatch.match(/startLineNumber="(\d+)"/);
const endLineMatch = blockMatch.match(/endLineNumber="(\d+)"/);
if (sourceFileMatch && startLineMatch && endLineMatch) {
duplicate.occurrences.push({
file: sourceFileMatch[1],
startLine: parseInt(startLineMatch[1]),
endLine: parseInt(endLineMatch[1]),
});
}
}
}
return duplicate.occurrences.length > 0 ? duplicate : null;
}
/**
* Parse plain text format output
* @param {string} plainOutput - Plain text output from Simian
* @returns {Object} Structured analysis result
*/
parsePlain(plainOutput) {
const lines = plainOutput.split('\n');
const duplicates = [];
let currentDuplicate = null;
const summary = {
totalFiles: 0,
duplicateLines: 0,
duplicateBlocks: 0,
analysisTime: 'unknown',
};
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.startsWith('Found') && trimmed.includes('duplicate lines')) {
// Summary line: "Found 45 duplicate lines in 2 blocks"
const match = trimmed.match(/Found (\d+) duplicate lines in (\d+) blocks/);
if (match) {
summary.duplicateLines = parseInt(match[1]);
summary.duplicateBlocks = parseInt(match[2]);
}
} else if (trimmed.includes('duplicate lines') && trimmed.includes('in:')) {
// Start of new duplicate block
const linesMatch = trimmed.match(/(\d+) duplicate lines/);
if (linesMatch) {
currentDuplicate = {
fingerprint: this.generateFingerprint(trimmed),
lineCount: parseInt(linesMatch[1]),
occurrences: [],
};
}
} else if (currentDuplicate && trimmed.includes(':') && trimmed.match(/\d+/)) {
// File occurrence line: "path/to/file.js:45"
const parts = trimmed.split(':');
if (parts.length >= 2) {
const filePath = parts.slice(0, -1).join(':');
const lineNumber = parseInt(parts[parts.length - 1]);
currentDuplicate.occurrences.push({
file: filePath.trim(),
startLine: lineNumber,
endLine: lineNumber + currentDuplicate.lineCount - 1,
});
}
} else if (currentDuplicate && trimmed === '') {
// End of current duplicate block
if (currentDuplicate.occurrences.length > 0) {
duplicates.push(currentDuplicate);
}
currentDuplicate = null;
}
}
// Add last duplicate if exists
if (currentDuplicate && currentDuplicate.occurrences.length > 0) {
duplicates.push(currentDuplicate);
}
return {
summary,
duplicates,
rawOutput: plainOutput,
};
}
/**
* Parse YAML format output
* @param {string} yamlOutput - YAML output from Simian
* @returns {Object} Structured analysis result
*/
parseYaml(yamlOutput) {
// Simple YAML parsing - in production, you'd use a proper YAML parser
const duplicates = [];
const summary = {
totalFiles: 0,
duplicateLines: 0,
duplicateBlocks: 0,
analysisTime: 'unknown',
};
// Basic YAML parsing for Simian output
const lines = yamlOutput.split('\n');
let currentDuplicate = null;
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.startsWith('- duplicate:')) {
// Start of new duplicate
currentDuplicate = {
fingerprint: '',
lineCount: 0,
occurrences: [],
};
} else if (currentDuplicate && trimmed.startsWith('lines:')) {
const match = trimmed.match(/lines:\s*(\d+)/);
if (match) {
currentDuplicate.lineCount = parseInt(match[1]);
}
} else if (currentDuplicate && trimmed.startsWith('- file:')) {
const fileMatch = trimmed.match(/file:\s*"?([^"]+)"?/);
if (fileMatch) {
currentDuplicate.occurrences.push({
file: fileMatch[1],
startLine: 0, // YAML format may not include line numbers
endLine: currentDuplicate.lineCount,
});
}
} else if (currentDuplicate && (trimmed === '' || trimmed.startsWith('- duplicate:'))) {
// End of current duplicate
if (currentDuplicate.occurrences.length > 0) {
currentDuplicate.fingerprint = this.generateFingerprint(JSON.stringify(currentDuplicate));
duplicates.push(currentDuplicate);
}
if (trimmed.startsWith('- duplicate:')) {
currentDuplicate = {
fingerprint: '',
lineCount: 0,
occurrences: [],
};
} else {
currentDuplicate = null;
}
}
}
// Add last duplicate if exists
if (currentDuplicate && currentDuplicate.occurrences.length > 0) {
currentDuplicate.fingerprint = this.generateFingerprint(JSON.stringify(currentDuplicate));
duplicates.push(currentDuplicate);
}
summary.duplicateBlocks = duplicates.length;
summary.duplicateLines = duplicates.reduce((total, dup) => {
return total + dup.lineCount;
}, 0);
return {
summary,
duplicates,
rawOutput: yamlOutput,
};
}
/**
* Generate a fingerprint for duplicate identification
* @param {string} content - Content to generate fingerprint from
* @returns {string} Fingerprint hash
*/
generateFingerprint(content) {
// Simple hash function - in production, use crypto
let hash = 0;
for (let i = 0; i < content.length; i++) {
const char = content.charCodeAt(i);
hash = (hash << 5) - hash + char;
hash = hash & hash; // Convert to 32-bit integer
}
return Math.abs(hash).toString(16);
}
/**
* Enhance analysis results with AI-friendly metadata
* @param {Object} analysis - Parsed analysis result
* @returns {Object} Enhanced analysis result
*/
enhanceForAI(analysis) {
const enhanced = { ...analysis };
// Add AI-friendly summary
enhanced.aiSummary = this.generateAISummary(analysis);
// Add prioritized recommendations
enhanced.recommendations = this.generateRecommendations(analysis);
// Add severity classification
enhanced.severity = this.classifySeverity(analysis);
return enhanced;
}
/**
* Generate AI-friendly summary
*/
generateAISummary(analysis) {
const { summary, duplicates } = analysis;
return {
overview: `Found ${summary.duplicateBlocks} duplicate code blocks across ${summary.totalFiles} files, totaling ${summary.duplicateLines} duplicate lines.`,
impact: this.assessImpact(analysis),
topDuplicates: duplicates
.sort((a, b) => {
return b.lineCount - a.lineCount;
})
.slice(0, 5)
.map(dup => {
return {
size: dup.lineCount,
occurrences: dup.occurrences.length,
files: dup.occurrences.map(occ => {
return occ.file;
}),
};
}),
};
}
/**
* Generate actionable recommendations
*/
generateRecommendations(analysis) {
const recommendations = [];
for (const duplicate of analysis.duplicates) {
if (duplicate.lineCount >= 10 && duplicate.occurrences.length >= 3) {
recommendations.push({
type: 'extract_function',
priority: 'high',
description: `Extract ${duplicate.lineCount}-line duplicate found in ${duplicate.occurrences.length} files`,
files: duplicate.occurrences.map(occ => {
return occ.file;
}),
estimatedEffort: 'medium',
});
}
}
return recommendations;
}
/**
* Classify severity of duplication
*/
classifySeverity(analysis) {
const { summary } = analysis;
const duplicateRatio = summary.duplicateLines / Math.max(summary.totalFiles * 50, 1); // Assume avg 50 lines per file
if (duplicateRatio > 0.3) {
return 'high';
}
if (duplicateRatio > 0.15) {
return 'medium';
}
return 'low';
}
/**
* Assess impact of duplicates
*/
assessImpact(analysis) {
const impacts = [];
if (analysis.summary.duplicateLines > 100) {
impacts.push('maintainability');
}
if (
analysis.duplicates.some(d => {
return d.occurrences.length > 3;
})
) {
impacts.push('consistency');
}
if (
analysis.duplicates.some(d => {
return d.lineCount > 20;
})
) {
impacts.push('complexity');
}
return impacts;
}
}