quality-check-mcp-server
Version:
MCP server for quality validation, scoring, and rerun strategy determination in GAFF
478 lines • 20.1 kB
JavaScript
#!/usr/bin/env node
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
// Tool definitions
const tools = [
{
name: 'validate_execution_result',
description: 'Validate the final result of an intent graph execution against quality criteria',
inputSchema: {
type: 'object',
properties: {
execution_result: {
type: 'object',
description: 'Result from router.execute_graph',
},
quality_criteria: {
type: 'object',
properties: {
completeness_required: { type: 'boolean' },
accuracy_threshold: { type: 'number' },
required_fields: {
type: 'array',
items: { type: 'string' },
},
},
},
intent_graph: {
type: 'object',
description: 'Original intent graph',
},
original_request: {
type: 'object',
description: 'Original user request',
},
},
required: ['execution_result', 'quality_criteria'],
},
},
{
name: 'score_quality',
description: 'Calculate quality score for execution results with weighted component scores',
inputSchema: {
type: 'object',
properties: {
execution_result: {
type: 'object',
description: 'Execution result to score',
},
scoring_criteria: {
type: 'object',
properties: {
completeness_weight: { type: 'number' },
accuracy_weight: { type: 'number' },
performance_weight: { type: 'number' },
},
},
},
required: ['execution_result'],
},
},
{
name: 'check_completeness',
description: 'Verify all required outputs are present and properly formatted',
inputSchema: {
type: 'object',
properties: {
execution_result: {
type: 'object',
description: 'Result to check',
},
required_outputs: {
type: 'object',
properties: {
required_fields: {
type: 'array',
items: { type: 'string' },
},
required_types: { type: 'object' },
required_formats: { type: 'object' },
},
},
},
required: ['execution_result', 'required_outputs'],
},
},
{
name: 'check_accuracy',
description: 'Validate accuracy and correctness of results against rules',
inputSchema: {
type: 'object',
properties: {
execution_result: {
type: 'object',
description: 'Result to validate',
},
accuracy_criteria: {
type: 'object',
properties: {
validation_rules: { type: 'array' },
business_rules: { type: 'array' },
expected_ranges: { type: 'object' },
},
},
reference_data: {
type: 'object',
description: 'Optional reference for comparison',
},
},
required: ['execution_result', 'accuracy_criteria'],
},
},
{
name: 'determine_rerun_strategy',
description: 'Intelligently decide the best rerun strategy based on failure analysis',
inputSchema: {
type: 'object',
properties: {
execution_result: {
type: 'object',
description: 'Execution result',
},
validation_result: {
type: 'object',
description: 'Result from validate_execution_result',
},
intent_graph: {
type: 'object',
description: 'Original intent graph',
},
failure_history: {
type: 'array',
description: 'Previous failures in this execution',
},
},
required: ['execution_result', 'validation_result', 'intent_graph'],
},
},
{
name: 'analyze_failure_patterns',
description: 'Identify patterns in failures to help improve workflows',
inputSchema: {
type: 'object',
properties: {
execution_history: {
type: 'array',
description: 'History of executions',
},
intent_graph: {
type: 'object',
description: 'Intent graph to analyze',
},
time_range: {
type: 'object',
properties: {
start: { type: 'string' },
end: { type: 'string' },
},
},
},
required: ['execution_history', 'intent_graph'],
},
},
];
// Create MCP server
const server = new Server({
name: 'quality-check-mcp-server',
version: '1.0.0',
}, {
capabilities: {
tools: {},
},
});
// List tools handler
server.setRequestHandler(ListToolsRequestSchema, async () => {
return { tools };
});
// Call tool handler
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
switch (name) {
case 'validate_execution_result': {
const { execution_result, quality_criteria, intent_graph, original_request } = args;
// TODO: Implement comprehensive validation logic
// This should:
// 1. Check completeness (all required fields present)
// 2. Validate accuracy (values meet criteria)
// 3. Check consistency
// 4. Verify format correctness
// 5. Calculate quality score
// 6. Determine if rerun is needed
const issues = [];
const required_fields = quality_criteria.required_fields || [];
const missing_fields = [];
// Placeholder completeness check
for (const field of required_fields) {
if (!execution_result[field]) {
missing_fields.push(field);
issues.push({
type: 'missing_field',
field,
message: `Required field '${field}' is missing`,
severity: 'error',
});
}
}
const completeness_score = required_fields.length > 0
? 1.0 - (missing_fields.length / required_fields.length)
: 1.0;
const accuracy_score = 1.0; // Placeholder
const quality_score = (completeness_score * 0.4) + (accuracy_score * 0.4) + (1.0 * 0.2);
const accuracy_threshold = quality_criteria.accuracy_threshold || 0.85;
const is_acceptable = quality_score >= accuracy_threshold;
const rerun_required = !is_acceptable;
// Placeholder rerun nodes determination
const rerun_nodes = [];
if (rerun_required && intent_graph?.nodes) {
// In real implementation, analyze which nodes failed
rerun_nodes.push(...intent_graph.nodes.slice(-2).map((n) => n.id || n.node_id));
}
return {
content: [
{
type: 'text',
text: JSON.stringify({
is_valid: issues.filter((i) => i.severity === 'error').length === 0,
quality_score,
is_acceptable,
issues,
completeness_score,
accuracy_score,
rerun_required,
rerun_nodes,
recommendations: rerun_required
? ['Review failed nodes', 'Check input data quality', 'Verify agent configurations']
: ['Result meets quality standards'],
timestamp: new Date().toISOString(),
}, null, 2),
},
],
};
}
case 'score_quality': {
const { execution_result, scoring_criteria } = args;
// TODO: Implement sophisticated quality scoring
// Use ML models or rule-based systems for accurate scoring
const weights = {
completeness: scoring_criteria?.completeness_weight || 0.4,
accuracy: scoring_criteria?.accuracy_weight || 0.4,
performance: scoring_criteria?.performance_weight || 0.2,
};
// Placeholder scores
const component_scores = {
completeness: 0.95,
accuracy: 0.90,
performance: 0.85,
custom: [],
};
const overall_score = component_scores.completeness * weights.completeness +
component_scores.accuracy * weights.accuracy +
component_scores.performance * weights.performance;
let grade;
if (overall_score >= 0.95)
grade = 'excellent';
else if (overall_score >= 0.85)
grade = 'good';
else if (overall_score >= 0.75)
grade = 'acceptable';
else if (overall_score >= 0.60)
grade = 'poor';
else
grade = 'failed';
const passing = overall_score >= 0.85;
return {
content: [
{
type: 'text',
text: JSON.stringify({
overall_score,
component_scores,
grade,
passing,
weights_used: weights,
timestamp: new Date().toISOString(),
}, null, 2),
},
],
};
}
case 'check_completeness': {
const { execution_result, required_outputs } = args;
// TODO: Implement comprehensive completeness checking
const missing_fields = [];
const type_mismatches = [];
const format_violations = [];
const required_fields = required_outputs.required_fields || [];
for (const field of required_fields) {
if (!execution_result[field]) {
missing_fields.push(field);
}
}
const completeness_score = required_fields.length > 0
? 1.0 - (missing_fields.length / required_fields.length)
: 1.0;
const is_complete = missing_fields.length === 0 && type_mismatches.length === 0;
return {
content: [
{
type: 'text',
text: JSON.stringify({
is_complete,
completeness_score,
missing_fields,
type_mismatches,
format_violations,
total_required: required_fields.length,
total_present: required_fields.length - missing_fields.length,
timestamp: new Date().toISOString(),
}, null, 2),
},
],
};
}
case 'check_accuracy': {
const { execution_result, accuracy_criteria, reference_data } = args;
// TODO: Implement accuracy validation
// Apply validation rules, business rules, range checks
const rule_violations = [];
// Placeholder validation
const validation_rules = accuracy_criteria.validation_rules || [];
// In real implementation, apply each rule
const accuracy_score = 1.0 - (rule_violations.length * 0.15);
const is_accurate = rule_violations.filter((v) => v.severity === 'error').length === 0;
return {
content: [
{
type: 'text',
text: JSON.stringify({
is_accurate,
accuracy_score: Math.max(0, accuracy_score),
rule_violations,
confidence: is_accurate ? 0.95 : 0.65,
rules_checked: validation_rules.length,
timestamp: new Date().toISOString(),
}, null, 2),
},
],
};
}
case 'determine_rerun_strategy': {
const { execution_result, validation_result, intent_graph, failure_history } = args;
// TODO: Implement intelligent rerun strategy algorithm
// Analyze failure patterns, node dependencies, historical data
const quality_score = validation_result.quality_score || 0;
const rerun_required = validation_result.rerun_required || quality_score < 0.85;
let strategy = 'none';
let rerun_nodes = [];
let reasoning = 'Result meets quality standards';
if (rerun_required) {
const failed_nodes = validation_result.rerun_nodes || [];
const attempt_count = (failure_history || []).length;
if (attempt_count >= 2) {
strategy = 'full';
reasoning = 'Multiple partial reruns failed, attempting full rerun';
}
else if (failed_nodes.length === 1) {
strategy = 'partial';
rerun_nodes = failed_nodes;
reasoning = 'Single node failure detected, rerunning failed node and dependencies';
}
else if (failed_nodes.length > 1 && failed_nodes.length < 5) {
strategy = 'partial';
rerun_nodes = failed_nodes;
reasoning = 'Multiple independent failures, rerunning affected nodes';
}
else {
strategy = 'adaptive';
rerun_nodes = failed_nodes;
reasoning = 'Complex failure pattern, using adaptive strategy';
}
}
const estimated_success_probability = strategy === 'full' ? 0.7 :
strategy === 'partial' ? 0.8 :
strategy === 'adaptive' ? 0.75 : 1.0;
return {
content: [
{
type: 'text',
text: JSON.stringify({
rerun_required,
strategy,
rerun_nodes,
estimated_success_probability,
reasoning,
max_attempts_recommendation: 3,
alternative_approaches: strategy === 'adaptive'
? ['Try different agent', 'Modify input parameters', 'Review workflow design']
: [],
timestamp: new Date().toISOString(),
}, null, 2),
},
],
};
}
case 'analyze_failure_patterns': {
const { execution_history, intent_graph, time_range } = args;
// TODO: Implement failure pattern analysis
// Use statistical analysis, ML clustering, or rule-based detection
const patterns = [];
const node_failures = new Map();
// Analyze execution history
for (const execution of execution_history) {
if (execution.failed_nodes) {
for (const node_id of execution.failed_nodes) {
node_failures.set(node_id, (node_failures.get(node_id) || 0) + 1);
}
}
}
// Identify patterns
for (const [node_id, count] of node_failures.entries()) {
if (count >= 3) {
patterns.push({
pattern_type: 'node_failure',
frequency: count,
affected_nodes: [node_id],
root_cause_hypothesis: `Node ${node_id} consistently fails`,
recommendation: `Review agent configuration for ${node_id}`,
});
}
}
const total_executions = execution_history.length;
const successful_executions = execution_history.filter((e) => e.success).length;
const success_rate = total_executions > 0 ? successful_executions / total_executions : 0;
const quality_scores = execution_history
.map((e) => e.quality_score)
.filter((s) => typeof s === 'number');
const average_quality_score = quality_scores.length > 0
? quality_scores.reduce((a, b) => a + b, 0) / quality_scores.length
: 0;
return {
content: [
{
type: 'text',
text: JSON.stringify({
patterns,
most_common_failures: Array.from(node_failures.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 5)
.map(([node_id, count]) => ({ node_id, failure_count: count })),
success_rate,
average_quality_score,
total_executions,
improvement_suggestions: [
success_rate < 0.8 ? 'Consider workflow redesign' : null,
average_quality_score < 0.85 ? 'Review quality criteria' : null,
patterns.length > 0 ? 'Address recurring failure patterns' : null,
].filter(Boolean),
timestamp: new Date().toISOString(),
}, null, 2),
},
],
};
}
default:
throw new Error(`Unknown tool: ${name}`);
}
});
// Start server
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error('Quality Check MCP Server running on stdio');
}
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});
//# sourceMappingURL=index.js.map