UNPKG

@spaik/mcp-server-roi

Version:

MCP server for AI ROI prediction and tracking with Monte Carlo simulations

806 lines 33.7 kB
import { z } from 'zod'; import { createLogger } from '../utils/logger.js'; /** * Quality Assurance Service * * Ensures response quality, validates insights, and maintains * consistency across all AI-optimized outputs. */ // QA schemas export const QualityMetricsSchema = z.object({ accuracy: z.number().min(0).max(1), completeness: z.number().min(0).max(1), clarity: z.number().min(0).max(1), relevance: z.number().min(0).max(1), consistency: z.number().min(0).max(1), overall_score: z.number().min(0).max(1) }); export const QualityIssueSchema = z.object({ severity: z.enum(['critical', 'high', 'medium', 'low']), category: z.enum(['accuracy', 'completeness', 'clarity', 'consistency', 'logic']), description: z.string(), location: z.string().optional(), suggested_fix: z.string().optional() }); export const QualityReportSchema = z.object({ metrics: QualityMetricsSchema, issues: z.array(QualityIssueSchema), passed: z.boolean(), confidence: z.number().min(0).max(1), recommendations: z.array(z.string()), validation_rules_applied: z.number(), auto_corrections: z.array(z.object({ issue: z.string(), correction: z.string() })) }); export class QualityAssurance { logger = createLogger({ component: 'QualityAssurance' }); // Quality thresholds QUALITY_THRESHOLDS = { minimum_overall: 0.7, critical_metric_minimum: 0.6, high_quality_target: 0.85 }; // Validation rules VALIDATION_RULES = { response_structure: [ { field: 'executive_summary', required: true }, { field: 'insights', required: true }, { field: 'recommendations', required: true }, { field: 'metadata', required: true } ], numeric_constraints: { roi_range: { min: -100, max: 10000 }, payback_months_range: { min: 1, max: 120 }, confidence_range: { min: 0, max: 1 }, probability_sum: 1.0 // For scenarios }, logical_rules: [ 'negative_roi_requires_justification', 'high_roi_requires_evidence', 'payback_exceeds_timeline_warning', 'risk_count_matches_mitigation_count' ] }; /** * Validate response quality */ async validateResponse(response, context) { this.logger.debug('Validating response quality', { tool: context?.tool }); // Run all validation checks const structureValid = this.validateStructure(response); const dataValid = this.validateDataIntegrity(response); const logicValid = this.validateBusinessLogic(response, context); const consistencyValid = this.validateConsistency(response); // Calculate quality metrics const metrics = this.calculateQualityMetrics(response, structureValid, dataValid, logicValid, consistencyValid); // Identify issues const issues = this.collectIssues(structureValid, dataValid, logicValid, consistencyValid); // Apply auto-corrections where possible const autoCorrections = await this.applyAutoCorrections(response, issues); // Generate recommendations const recommendations = this.generateQualityRecommendations(metrics, issues); // Determine if response passes quality checks const passed = this.determineQualityPass(metrics, issues); return { metrics, issues, passed, confidence: this.calculateConfidence(metrics, issues), recommendations, validation_rules_applied: this.countAppliedRules(), auto_corrections: autoCorrections }; } /** * Ensure consistency across responses */ async ensureConsistency(responses, crossCheckType) { this.logger.debug('Checking consistency across responses', { count: responses.length, type: crossCheckType }); const inconsistencies = []; switch (crossCheckType) { case 'temporal': inconsistencies.push(...this.checkTemporalConsistency(responses)); break; case 'logical': inconsistencies.push(...this.checkLogicalConsistency(responses)); break; case 'semantic': inconsistencies.push(...this.checkSemanticConsistency(responses)); break; } const alignmentScore = this.calculateAlignmentScore(responses, inconsistencies); return { consistent: inconsistencies.length === 0, inconsistencies, alignment_score: alignmentScore }; } /** * Validate insights quality */ async validateInsights(insights, sourceData) { this.logger.debug('Validating insights quality'); const issues = []; const qualityScores = {}; // Validate primary insights if (insights.primary) { insights.primary.forEach((insight, idx) => { const score = this.scoreInsightQuality(insight, sourceData); qualityScores[`primary_${idx}`] = score; if (score < 0.6) { issues.push(`Weak primary insight at index ${idx}: lacks supporting evidence`); } }); } // Validate risk insights if (insights.risks) { insights.risks.forEach((risk, idx) => { const score = this.scoreRiskValidity(risk, sourceData); qualityScores[`risk_${idx}`] = score; if (score < 0.5) { issues.push(`Questionable risk at index ${idx}: may be overstated`); } }); } // Validate opportunities if (insights.opportunities) { insights.opportunities.forEach((opp, idx) => { const score = this.scoreOpportunityFeasibility(opp, sourceData); qualityScores[`opportunity_${idx}`] = score; if (score < 0.5) { issues.push(`Unrealistic opportunity at index ${idx}`); } }); } // Calculate evidence strength const evidenceStrength = this.calculateEvidenceStrength(insights, sourceData); return { valid: issues.length === 0, issues, insight_quality_scores: qualityScores, evidence_strength: evidenceStrength }; } /** * Perform semantic validation */ async validateSemantics(response) { this.logger.debug('Performing semantic validation'); const semanticIssues = []; // Check for contradictions const contradictions = this.findContradictions(response); semanticIssues.push(...contradictions.map(c => ({ type: 'contradiction', description: c, severity: 0.8 }))); // Check for ambiguities const ambiguities = this.findAmbiguities(response); semanticIssues.push(...ambiguities.map(a => ({ type: 'ambiguity', description: a, severity: 0.5 }))); // Check for redundancies const redundancies = this.findRedundancies(response); semanticIssues.push(...redundancies.map(r => ({ type: 'redundancy', description: r, severity: 0.3 }))); // Check for gaps const gaps = this.findSemanticGaps(response); semanticIssues.push(...gaps.map(g => ({ type: 'gap', description: g, severity: 0.6 }))); // Calculate readability const readabilityScore = this.calculateReadability(response); return { coherent: semanticIssues.filter(i => i.severity > 0.5).length === 0, semantic_issues: semanticIssues, readability_score: readabilityScore }; } /** * Auto-improve response quality */ async autoImproveResponse(response, qualityReport) { this.logger.debug('Auto-improving response quality'); let improvedResponse = JSON.parse(JSON.stringify(response)); const improvements = []; // Fix critical issues first const criticalIssues = qualityReport.issues.filter(i => i.severity === 'critical'); for (const issue of criticalIssues) { const improvement = await this.fixIssue(improvedResponse, issue); if (improvement.fixed) { improvements.push(improvement.description); improvedResponse = improvement.response; } } // Enhance weak areas if (qualityReport.metrics.clarity < 0.7) { improvedResponse = this.enhanceClarity(improvedResponse); improvements.push('Enhanced clarity of key messages'); } if (qualityReport.metrics.completeness < 0.8) { improvedResponse = await this.enhanceCompleteness(improvedResponse); improvements.push('Added missing context and details'); } // Re-validate to measure improvement const newReport = await this.validateResponse(improvedResponse); const qualityDelta = newReport.metrics.overall_score - qualityReport.metrics.overall_score; return { improved_response: improvedResponse, improvements_made: improvements, quality_delta: qualityDelta }; } // Private validation methods validateStructure(response) { const issues = []; // Check required fields this.VALIDATION_RULES.response_structure.forEach(rule => { if (rule.required && !response[rule.field]) { issues.push(`Missing required field: ${rule.field}`); } }); // Check field types if (response.executive_summary && typeof response.executive_summary !== 'object') { issues.push('Executive summary must be an object'); } if (response.insights && typeof response.insights !== 'object') { issues.push('Insights must be an object'); } return { valid: issues.length === 0, issues }; } validateDataIntegrity(response) { const issues = []; // Validate numeric ranges if (response.summary?.expected_roi !== undefined) { const roi = response.summary.expected_roi; const { min, max } = this.VALIDATION_RULES.numeric_constraints.roi_range; if (roi < min || roi > max) { issues.push(`ROI ${roi}% outside valid range [${min}, ${max}]`); } } if (response.summary?.payback_period_months !== undefined) { const payback = response.summary.payback_period_months; const { min, max } = this.VALIDATION_RULES.numeric_constraints.payback_months_range; if (payback < min || payback > max) { issues.push(`Payback period ${payback} months outside valid range [${min}, ${max}]`); } } // Validate probabilities sum to 1 if (response.scenarios) { const probabilitySum = response.scenarios.reduce((sum, s) => sum + (s.probability || 0), 0); if (Math.abs(probabilitySum - 1) > 0.01) { issues.push(`Scenario probabilities sum to ${probabilitySum}, should be 1.0`); } } return { valid: issues.length === 0, issues }; } validateBusinessLogic(response, context) { const issues = []; // Apply logical rules if (response.summary?.expected_roi < 0 && !response.justification) { issues.push('Negative ROI requires justification'); } if (response.summary?.expected_roi > 500 && (!response.evidence || response.evidence.length < 3)) { issues.push('High ROI claims require supporting evidence'); } if (response.summary?.payback_period_months > (response.timeline_months || 24)) { issues.push('Payback period exceeds project timeline'); } if (response.insights?.risks && response.recommendations?.mitigations) { if (response.insights.risks.length !== response.recommendations.mitigations.length) { issues.push('Each risk should have a corresponding mitigation strategy'); } } return { valid: issues.length === 0, issues }; } validateConsistency(response) { const issues = []; // Check internal consistency if (response.executive_summary?.confidence === 'high' && response.metadata?.confidence_score < 0.7) { issues.push('Executive confidence level inconsistent with confidence score'); } if (response.summary?.expected_roi > 200 && response.executive_summary?.confidence === 'low') { issues.push('High ROI with low confidence is inconsistent'); } // Check narrative consistency if (response.narrative?.tone === 'optimistic' && response.insights?.risks?.length > 5) { issues.push('Optimistic tone inconsistent with high risk count'); } return { valid: issues.length === 0, issues }; } calculateQualityMetrics(response, structureValid, dataValid, logicValid, consistencyValid) { // Accuracy based on data validation const accuracy = dataValid.valid ? 1.0 : Math.max(0.3, 1 - (dataValid.issues.length * 0.1)); // Completeness based on structure const requiredFields = this.VALIDATION_RULES.response_structure.filter(r => r.required); const presentFields = requiredFields.filter(r => response[r.field]).length; const completeness = presentFields / requiredFields.length; // Clarity based on structure and semantics const hasExecSummary = response.executive_summary?.headline ? 0.3 : 0; const hasKeyInsight = response.executive_summary?.key_insight ? 0.3 : 0; const hasRecommendations = response.recommendations?.next_action ? 0.4 : 0; const clarity = hasExecSummary + hasKeyInsight + hasRecommendations; // Relevance (simplified - would need context in real implementation) const relevance = response.insights?.primary?.length > 0 ? 0.9 : 0.5; // Consistency const consistency = consistencyValid.valid ? 1.0 : Math.max(0.4, 1 - (consistencyValid.issues.length * 0.15)); // Overall score const overall = (accuracy * 0.25 + completeness * 0.2 + clarity * 0.2 + relevance * 0.15 + consistency * 0.2); return { accuracy, completeness, clarity, relevance, consistency, overall_score: overall }; } collectIssues(structureValid, dataValid, logicValid, consistencyValid) { const issues = []; // Structure issues structureValid.issues.forEach((issue) => { issues.push({ severity: issue.includes('required') ? 'critical' : 'high', category: 'completeness', description: issue, suggested_fix: `Add missing ${issue.split(':')[1]?.trim()} to response` }); }); // Data issues dataValid.issues.forEach((issue) => { issues.push({ severity: issue.includes('range') ? 'high' : 'medium', category: 'accuracy', description: issue, suggested_fix: 'Verify and correct the data values' }); }); // Logic issues logicValid.issues.forEach((issue) => { issues.push({ severity: issue.includes('requires') ? 'high' : 'medium', category: 'logic', description: issue, suggested_fix: this.getSuggestedFix(issue) }); }); // Consistency issues consistencyValid.issues.forEach((issue) => { issues.push({ severity: 'medium', category: 'consistency', description: issue, suggested_fix: 'Align all related fields for consistency' }); }); return issues; } async applyAutoCorrections(response, issues) { const corrections = []; // Auto-fix probability sum const probSumIssue = issues.find(i => i.description.includes('probabilities sum')); if (probSumIssue && response.scenarios) { const sum = response.scenarios.reduce((s, sc) => s + sc.probability, 0); response.scenarios.forEach((sc) => { sc.probability = sc.probability / sum; }); corrections.push({ issue: 'Probability sum normalization', correction: 'Normalized scenario probabilities to sum to 1.0' }); } // Add missing confidence score if (!response.metadata?.confidence_score && response.executive_summary?.confidence) { const confidenceMap = { high: 0.85, medium: 0.7, low: 0.5 }; response.metadata = response.metadata || {}; response.metadata.confidence_score = confidenceMap[response.executive_summary.confidence] || 0.7; corrections.push({ issue: 'Missing confidence score', correction: 'Added confidence score based on executive summary' }); } return corrections; } generateQualityRecommendations(metrics, issues) { const recommendations = []; if (metrics.accuracy < 0.8) { recommendations.push('Verify all numeric calculations and data sources'); } if (metrics.completeness < 0.8) { recommendations.push('Add missing sections to provide complete analysis'); } if (metrics.clarity < 0.7) { recommendations.push('Simplify language and add executive summary'); } if (issues.filter(i => i.severity === 'critical').length > 0) { recommendations.push('Address critical issues before using this response'); } if (metrics.overall_score > 0.85) { recommendations.push('Response meets high quality standards'); } return recommendations; } determineQualityPass(metrics, issues) { // Fail if any critical issues if (issues.some(i => i.severity === 'critical')) { return false; } // Fail if overall score too low if (metrics.overall_score < this.QUALITY_THRESHOLDS.minimum_overall) { return false; } // Fail if any metric critically low const metricValues = Object.values(metrics).filter(v => typeof v === 'number'); if (metricValues.some(v => v < this.QUALITY_THRESHOLDS.critical_metric_minimum)) { return false; } return true; } calculateConfidence(metrics, issues) { let confidence = metrics.overall_score; // Reduce confidence for issues const issuePenalty = issues.reduce((penalty, issue) => { switch (issue.severity) { case 'critical': return penalty + 0.2; case 'high': return penalty + 0.1; case 'medium': return penalty + 0.05; case 'low': return penalty + 0.02; } }, 0); confidence = Math.max(0.1, confidence - issuePenalty); // Boost confidence for high quality if (metrics.overall_score > this.QUALITY_THRESHOLDS.high_quality_target) { confidence = Math.min(0.95, confidence * 1.1); } return confidence; } countAppliedRules() { return this.VALIDATION_RULES.response_structure.length + Object.keys(this.VALIDATION_RULES.numeric_constraints).length + this.VALIDATION_RULES.logical_rules.length; } // Consistency checking methods checkTemporalConsistency(responses) { const inconsistencies = []; // Check if metrics improve or degrade logically over time for (let i = 1; i < responses.length; i++) { const prev = responses[i - 1]; const curr = responses[i]; if (prev.summary?.expected_roi && curr.summary?.expected_roi) { const roiDelta = Math.abs(curr.summary.expected_roi - prev.summary.expected_roi); if (roiDelta > 50 && !curr.change_justification) { inconsistencies.push({ type: 'temporal_metric_jump', description: `ROI changed by ${roiDelta}% without justification`, affected_responses: [i - 1, i], severity: 'high' }); } } } return inconsistencies; } checkLogicalConsistency(responses) { const inconsistencies = []; responses.forEach((response, idx) => { // Check if high ROI aligns with low risk if (response.summary?.expected_roi > 200 && response.insights?.risks?.length < 2) { inconsistencies.push({ type: 'risk_return_mismatch', description: 'High ROI with unrealistically low risk profile', affected_responses: [idx], severity: 'medium' }); } }); return inconsistencies; } checkSemanticConsistency(responses) { const inconsistencies = []; // Check if terminology is used consistently const termUsage = new Map(); responses.forEach((response, idx) => { const terms = this.extractKeyTerms(response); terms.forEach(term => { if (!termUsage.has(term)) { termUsage.set(term, new Set()); } termUsage.get(term).add(idx); }); }); // Flag terms that appear in some but not all relevant responses termUsage.forEach((indices, term) => { if (indices.size < responses.length * 0.5 && indices.size > 0) { inconsistencies.push({ type: 'terminology_inconsistency', description: `Term "${term}" used inconsistently across responses`, affected_responses: Array.from(indices), severity: 'low' }); } }); return inconsistencies; } calculateAlignmentScore(responses, inconsistencies) { const baseScore = 1.0; const penalty = inconsistencies.reduce((sum, inc) => { switch (inc.severity) { case 'high': return sum + 0.15; case 'medium': return sum + 0.08; case 'low': return sum + 0.03; default: return sum; } }, 0); return Math.max(0, baseScore - penalty); } // Insight validation methods scoreInsightQuality(insight, sourceData) { let score = 0.5; // Base score // Check if insight is specific (contains numbers) if (/\d+/.test(insight)) score += 0.2; // Check if insight references source data if (sourceData.summary && insight.toLowerCase().includes('roi')) score += 0.1; if (sourceData.summary && insight.toLowerCase().includes('payback')) score += 0.1; // Check if insight is actionable const actionableWords = ['should', 'could', 'recommend', 'consider', 'implement']; if (actionableWords.some(word => insight.toLowerCase().includes(word))) { score += 0.1; } return Math.min(1, score); } scoreRiskValidity(risk, sourceData) { let score = 0.5; // Check if risk is specific if (risk.split(' ').length > 5) score += 0.2; // Check if risk aligns with project characteristics if (sourceData.use_cases?.length > 5 && risk.toLowerCase().includes('complex')) { score += 0.2; } // Penalize generic risks const genericRisks = ['implementation', 'adoption', 'technical']; if (genericRisks.some(g => risk.toLowerCase() === g)) { score -= 0.2; } return Math.max(0, Math.min(1, score)); } scoreOpportunityFeasibility(opportunity, sourceData) { let score = 0.5; // Check if opportunity is quantified if (/\d+%/.test(opportunity)) score += 0.2; // Check if opportunity aligns with capabilities if (sourceData.use_cases?.some((uc) => opportunity.toLowerCase().includes(uc.category))) { score += 0.2; } // Check if opportunity is realistic if (opportunity.toLowerCase().includes('10x') || opportunity.toLowerCase().includes('revolutionary')) { score -= 0.3; // Too ambitious } return Math.max(0, Math.min(1, score)); } calculateEvidenceStrength(insights, sourceData) { let evidencePoints = 0; let totalPoints = 0; // Check if insights are backed by data if (insights.primary) { totalPoints += insights.primary.length; insights.primary.forEach((insight) => { if (this.hasDataSupport(insight, sourceData)) { evidencePoints++; } }); } return totalPoints > 0 ? evidencePoints / totalPoints : 0; } // Semantic validation methods findContradictions(response) { const contradictions = []; // Check confidence vs risk contradictions if (response.executive_summary?.confidence === 'high' && response.insights?.risks?.length > 5) { contradictions.push('High confidence contradicts numerous risks'); } // Check ROI vs payback contradictions if (response.summary?.expected_roi > 200 && response.summary?.payback_period_months > 36) { contradictions.push('Very high ROI contradicts long payback period'); } return contradictions; } findAmbiguities(response) { const ambiguities = []; // Check for vague recommendations if (response.recommendations?.next_action && response.recommendations.next_action.split(' ').length < 5) { ambiguities.push('Next action recommendation is too vague'); } // Check for unclear timelines if (response.recommendations?.timeline && !response.recommendations.timeline.match(/\d+/)) { ambiguities.push('Timeline lacks specific duration'); } return ambiguities; } findRedundancies(response) { const redundancies = []; // Check if insights repeat information if (response.insights?.primary && response.executive_summary?.key_insight) { const execInsight = response.executive_summary.key_insight.toLowerCase(); const duplicates = response.insights.primary.filter((i) => i.toLowerCase().includes(execInsight) || execInsight.includes(i.toLowerCase())); if (duplicates.length > 0) { redundancies.push('Primary insights duplicate executive summary'); } } return redundancies; } findSemanticGaps(response) { const gaps = []; // Check if risks have mitigations if (response.insights?.risks?.length > 0 && (!response.recommendations?.mitigations || response.recommendations.mitigations.length === 0)) { gaps.push('Risks identified but no mitigation strategies provided'); } // Check if opportunities have action plans if (response.insights?.opportunities?.length > 0 && !response.recommendations?.next_action) { gaps.push('Opportunities identified but no action plan provided'); } return gaps; } calculateReadability(response) { // Simplified readability score based on structure let score = 0.5; if (response.executive_summary?.headline) score += 0.2; if (response.narrative?.style === 'conversational') score += 0.1; if (response.recommendations?.success_criteria?.length > 0) score += 0.1; if (response.metadata?.complexity === 'simple') score += 0.1; return Math.min(1, score); } // Auto-improvement methods async fixIssue(response, issue) { let fixed = false; let description = ''; switch (issue.category) { case 'completeness': if (issue.description.includes('executive_summary')) { response.executive_summary = response.executive_summary || { headline: 'Analysis Complete', confidence: 'medium', key_insight: response.insights?.primary?.[0] || 'See detailed analysis' }; fixed = true; description = 'Added missing executive summary'; } break; case 'accuracy': if (issue.description.includes('ROI') && issue.description.includes('range')) { // Clamp ROI to valid range if (response.summary?.expected_roi) { response.summary.expected_roi = Math.max(-100, Math.min(10000, response.summary.expected_roi)); fixed = true; description = 'Adjusted ROI to valid range'; } } break; case 'logic': if (issue.description.includes('Negative ROI requires justification')) { response.justification = 'Strategic investment for future capabilities'; fixed = true; description = 'Added justification for negative ROI'; } break; } return { fixed, response, description }; } enhanceClarity(response) { // Add clear structure if missing if (!response.executive_summary?.headline && response.summary) { response.executive_summary = response.executive_summary || {}; response.executive_summary.headline = `${response.summary.expected_roi}% ROI with ${response.summary.payback_period_months}-month payback`; } // Simplify complex insights if (response.insights?.primary) { response.insights.primary = response.insights.primary.map((insight) => { if (insight.length > 100) { // Shorten to key point return insight.substring(0, 80) + '...'; } return insight; }); } return response; } async enhanceCompleteness(response) { // Add missing recommendations if (!response.recommendations && response.insights) { response.recommendations = { next_action: response.insights.opportunities?.[0] ? `Pursue ${response.insights.opportunities[0]}` : 'Review detailed analysis for opportunities', timeline: 'Begin within 30 days', success_criteria: ['Achieve projected ROI', 'Meet timeline targets'] }; } // Add missing metadata if (!response.metadata) { response.metadata = { generated_at: new Date().toISOString(), confidence_score: 0.75, data_quality: 'good' }; } return response; } // Helper methods getSuggestedFix(issue) { if (issue.includes('justification')) { return 'Add business justification for unusual metrics'; } if (issue.includes('evidence')) { return 'Include supporting data and benchmarks'; } if (issue.includes('mitigation')) { return 'Add risk mitigation strategies'; } return 'Review and correct the identified issue'; } extractKeyTerms(response) { const terms = []; // Extract from various fields const textFields = [ response.executive_summary?.headline, response.executive_summary?.key_insight, ...(response.insights?.primary || []), ...(response.insights?.risks || []) ]; textFields.forEach(text => { if (text) { // Simple term extraction (in reality would use NLP) const words = text.split(/\s+/).filter((w) => w.length > 4); terms.push(...words); } }); return Array.from(new Set(terms)); } hasDataSupport(insight, sourceData) { // Check if insight references actual data const hasNumbers = /\d+/.test(insight); const referencesMetrics = ['roi', 'payback', 'cost', 'benefit', 'saving'] .some(metric => insight.toLowerCase().includes(metric)); return hasNumbers || referencesMetrics; } } // Export singleton instance export const qualityAssurance = new QualityAssurance(); //# sourceMappingURL=quality-assurance.js.map