UNPKG

quality-check-mcp-server

Version:

MCP server for quality validation, scoring, and rerun strategy determination in GAFF

478 lines 20.1 kB
#!/usr/bin/env node import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js'; // Tool definitions const tools = [ { name: 'validate_execution_result', description: 'Validate the final result of an intent graph execution against quality criteria', inputSchema: { type: 'object', properties: { execution_result: { type: 'object', description: 'Result from router.execute_graph', }, quality_criteria: { type: 'object', properties: { completeness_required: { type: 'boolean' }, accuracy_threshold: { type: 'number' }, required_fields: { type: 'array', items: { type: 'string' }, }, }, }, intent_graph: { type: 'object', description: 'Original intent graph', }, original_request: { type: 'object', description: 'Original user request', }, }, required: ['execution_result', 'quality_criteria'], }, }, { name: 'score_quality', description: 'Calculate quality score for execution results with weighted component scores', inputSchema: { type: 'object', properties: { execution_result: { type: 'object', description: 'Execution result to score', }, scoring_criteria: { type: 'object', properties: { completeness_weight: { type: 'number' }, accuracy_weight: { type: 'number' }, performance_weight: { type: 'number' }, }, }, }, required: ['execution_result'], }, }, { name: 'check_completeness', description: 'Verify all required outputs are present and properly formatted', inputSchema: { type: 'object', properties: { execution_result: { type: 'object', description: 'Result to check', }, required_outputs: { type: 'object', properties: { required_fields: { type: 'array', items: { type: 'string' }, }, required_types: { type: 'object' }, required_formats: { type: 'object' }, }, }, }, required: ['execution_result', 'required_outputs'], }, }, { name: 'check_accuracy', description: 'Validate accuracy and correctness of results against rules', inputSchema: { type: 'object', properties: { execution_result: { type: 'object', description: 'Result to validate', }, accuracy_criteria: { type: 'object', properties: { validation_rules: { type: 'array' }, business_rules: { type: 'array' }, expected_ranges: { type: 'object' }, }, }, reference_data: { type: 'object', description: 'Optional reference for comparison', }, }, required: ['execution_result', 'accuracy_criteria'], }, }, { name: 'determine_rerun_strategy', description: 'Intelligently decide the best rerun strategy based on failure analysis', inputSchema: { type: 'object', properties: { execution_result: { type: 'object', description: 'Execution result', }, validation_result: { type: 'object', description: 'Result from validate_execution_result', }, intent_graph: { type: 'object', description: 'Original intent graph', }, failure_history: { type: 'array', description: 'Previous failures in this execution', }, }, required: ['execution_result', 'validation_result', 'intent_graph'], }, }, { name: 'analyze_failure_patterns', description: 'Identify patterns in failures to help improve workflows', inputSchema: { type: 'object', properties: { execution_history: { type: 'array', description: 'History of executions', }, intent_graph: { type: 'object', description: 'Intent graph to analyze', }, time_range: { type: 'object', properties: { start: { type: 'string' }, end: { type: 'string' }, }, }, }, required: ['execution_history', 'intent_graph'], }, }, ]; // Create MCP server const server = new Server({ name: 'quality-check-mcp-server', version: '1.0.0', }, { capabilities: { tools: {}, }, }); // List tools handler server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools }; }); // Call tool handler server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; switch (name) { case 'validate_execution_result': { const { execution_result, quality_criteria, intent_graph, original_request } = args; // TODO: Implement comprehensive validation logic // This should: // 1. Check completeness (all required fields present) // 2. Validate accuracy (values meet criteria) // 3. Check consistency // 4. Verify format correctness // 5. Calculate quality score // 6. Determine if rerun is needed const issues = []; const required_fields = quality_criteria.required_fields || []; const missing_fields = []; // Placeholder completeness check for (const field of required_fields) { if (!execution_result[field]) { missing_fields.push(field); issues.push({ type: 'missing_field', field, message: `Required field '${field}' is missing`, severity: 'error', }); } } const completeness_score = required_fields.length > 0 ? 1.0 - (missing_fields.length / required_fields.length) : 1.0; const accuracy_score = 1.0; // Placeholder const quality_score = (completeness_score * 0.4) + (accuracy_score * 0.4) + (1.0 * 0.2); const accuracy_threshold = quality_criteria.accuracy_threshold || 0.85; const is_acceptable = quality_score >= accuracy_threshold; const rerun_required = !is_acceptable; // Placeholder rerun nodes determination const rerun_nodes = []; if (rerun_required && intent_graph?.nodes) { // In real implementation, analyze which nodes failed rerun_nodes.push(...intent_graph.nodes.slice(-2).map((n) => n.id || n.node_id)); } return { content: [ { type: 'text', text: JSON.stringify({ is_valid: issues.filter((i) => i.severity === 'error').length === 0, quality_score, is_acceptable, issues, completeness_score, accuracy_score, rerun_required, rerun_nodes, recommendations: rerun_required ? ['Review failed nodes', 'Check input data quality', 'Verify agent configurations'] : ['Result meets quality standards'], timestamp: new Date().toISOString(), }, null, 2), }, ], }; } case 'score_quality': { const { execution_result, scoring_criteria } = args; // TODO: Implement sophisticated quality scoring // Use ML models or rule-based systems for accurate scoring const weights = { completeness: scoring_criteria?.completeness_weight || 0.4, accuracy: scoring_criteria?.accuracy_weight || 0.4, performance: scoring_criteria?.performance_weight || 0.2, }; // Placeholder scores const component_scores = { completeness: 0.95, accuracy: 0.90, performance: 0.85, custom: [], }; const overall_score = component_scores.completeness * weights.completeness + component_scores.accuracy * weights.accuracy + component_scores.performance * weights.performance; let grade; if (overall_score >= 0.95) grade = 'excellent'; else if (overall_score >= 0.85) grade = 'good'; else if (overall_score >= 0.75) grade = 'acceptable'; else if (overall_score >= 0.60) grade = 'poor'; else grade = 'failed'; const passing = overall_score >= 0.85; return { content: [ { type: 'text', text: JSON.stringify({ overall_score, component_scores, grade, passing, weights_used: weights, timestamp: new Date().toISOString(), }, null, 2), }, ], }; } case 'check_completeness': { const { execution_result, required_outputs } = args; // TODO: Implement comprehensive completeness checking const missing_fields = []; const type_mismatches = []; const format_violations = []; const required_fields = required_outputs.required_fields || []; for (const field of required_fields) { if (!execution_result[field]) { missing_fields.push(field); } } const completeness_score = required_fields.length > 0 ? 1.0 - (missing_fields.length / required_fields.length) : 1.0; const is_complete = missing_fields.length === 0 && type_mismatches.length === 0; return { content: [ { type: 'text', text: JSON.stringify({ is_complete, completeness_score, missing_fields, type_mismatches, format_violations, total_required: required_fields.length, total_present: required_fields.length - missing_fields.length, timestamp: new Date().toISOString(), }, null, 2), }, ], }; } case 'check_accuracy': { const { execution_result, accuracy_criteria, reference_data } = args; // TODO: Implement accuracy validation // Apply validation rules, business rules, range checks const rule_violations = []; // Placeholder validation const validation_rules = accuracy_criteria.validation_rules || []; // In real implementation, apply each rule const accuracy_score = 1.0 - (rule_violations.length * 0.15); const is_accurate = rule_violations.filter((v) => v.severity === 'error').length === 0; return { content: [ { type: 'text', text: JSON.stringify({ is_accurate, accuracy_score: Math.max(0, accuracy_score), rule_violations, confidence: is_accurate ? 0.95 : 0.65, rules_checked: validation_rules.length, timestamp: new Date().toISOString(), }, null, 2), }, ], }; } case 'determine_rerun_strategy': { const { execution_result, validation_result, intent_graph, failure_history } = args; // TODO: Implement intelligent rerun strategy algorithm // Analyze failure patterns, node dependencies, historical data const quality_score = validation_result.quality_score || 0; const rerun_required = validation_result.rerun_required || quality_score < 0.85; let strategy = 'none'; let rerun_nodes = []; let reasoning = 'Result meets quality standards'; if (rerun_required) { const failed_nodes = validation_result.rerun_nodes || []; const attempt_count = (failure_history || []).length; if (attempt_count >= 2) { strategy = 'full'; reasoning = 'Multiple partial reruns failed, attempting full rerun'; } else if (failed_nodes.length === 1) { strategy = 'partial'; rerun_nodes = failed_nodes; reasoning = 'Single node failure detected, rerunning failed node and dependencies'; } else if (failed_nodes.length > 1 && failed_nodes.length < 5) { strategy = 'partial'; rerun_nodes = failed_nodes; reasoning = 'Multiple independent failures, rerunning affected nodes'; } else { strategy = 'adaptive'; rerun_nodes = failed_nodes; reasoning = 'Complex failure pattern, using adaptive strategy'; } } const estimated_success_probability = strategy === 'full' ? 0.7 : strategy === 'partial' ? 0.8 : strategy === 'adaptive' ? 0.75 : 1.0; return { content: [ { type: 'text', text: JSON.stringify({ rerun_required, strategy, rerun_nodes, estimated_success_probability, reasoning, max_attempts_recommendation: 3, alternative_approaches: strategy === 'adaptive' ? ['Try different agent', 'Modify input parameters', 'Review workflow design'] : [], timestamp: new Date().toISOString(), }, null, 2), }, ], }; } case 'analyze_failure_patterns': { const { execution_history, intent_graph, time_range } = args; // TODO: Implement failure pattern analysis // Use statistical analysis, ML clustering, or rule-based detection const patterns = []; const node_failures = new Map(); // Analyze execution history for (const execution of execution_history) { if (execution.failed_nodes) { for (const node_id of execution.failed_nodes) { node_failures.set(node_id, (node_failures.get(node_id) || 0) + 1); } } } // Identify patterns for (const [node_id, count] of node_failures.entries()) { if (count >= 3) { patterns.push({ pattern_type: 'node_failure', frequency: count, affected_nodes: [node_id], root_cause_hypothesis: `Node ${node_id} consistently fails`, recommendation: `Review agent configuration for ${node_id}`, }); } } const total_executions = execution_history.length; const successful_executions = execution_history.filter((e) => e.success).length; const success_rate = total_executions > 0 ? successful_executions / total_executions : 0; const quality_scores = execution_history .map((e) => e.quality_score) .filter((s) => typeof s === 'number'); const average_quality_score = quality_scores.length > 0 ? quality_scores.reduce((a, b) => a + b, 0) / quality_scores.length : 0; return { content: [ { type: 'text', text: JSON.stringify({ patterns, most_common_failures: Array.from(node_failures.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 5) .map(([node_id, count]) => ({ node_id, failure_count: count })), success_rate, average_quality_score, total_executions, improvement_suggestions: [ success_rate < 0.8 ? 'Consider workflow redesign' : null, average_quality_score < 0.85 ? 'Review quality criteria' : null, patterns.length > 0 ? 'Address recurring failure patterns' : null, ].filter(Boolean), timestamp: new Date().toISOString(), }, null, 2), }, ], }; } default: throw new Error(`Unknown tool: ${name}`); } }); // Start server async function main() { const transport = new StdioServerTransport(); await server.connect(transport); console.error('Quality Check MCP Server running on stdio'); } main().catch((error) => { console.error('Fatal error:', error); process.exit(1); }); //# sourceMappingURL=index.js.map