mcp-workflow-server-enhanced

Version:

Enhanced MCP Workflow Server with smart problem routing, comprehensive validation, guide compliance, and robust error handling. Intelligently routes to appropriate AI functions based on problem type.

github.com/zrald/mcp-workflow-server-enhanced

zrald/mcp-workflow-server-enhanced

514 lines (462 loc) • 15.2 kB

text/typescript

import { WorkflowContext, GuideCompliance, FunctionOutput } from './types.js'; import { validateGuideCompliance } from './validation.js'; import { globalErrorHandler } from './error-handling.js'; /** * Incremental Testing Framework for MCP Workflow Server * * This framework provides comprehensive testing infrastructure for validating * problem-solving accuracy, guide compliance, and workflow functionality. */ export interface TestCase { id: string; name: string; description: string; input: any; expectedOutput?: any; expectedCompliance?: Partial<GuideCompliance>; stepName: string; category: 'unit' | 'integration' | 'compliance' | 'domain-specific'; priority: 'low' | 'medium' | 'high' | 'critical'; } export interface TestResult { testId: string; passed: boolean; score: number; // 0-100 errors: string[]; warnings: string[]; actualOutput?: any; complianceResult?: GuideCompliance; executionTime: number; metadata: Record<string, any>; } export interface TestSuite { name: string; description: string; tests: TestCase[]; setup?: () => Promise<void>; teardown?: () => Promise<void>; } /** * Test runner for incremental validation */ export class IncrementalTestRunner { private testSuites: Map<string, TestSuite> = new Map(); private results: Map<string, TestResult[]> = new Map(); /** * Register a test suite */ registerTestSuite(name: string, suite: TestSuite): void { this.testSuites.set(name, suite); } /** * Run a specific test case */ async runTest( testCase: TestCase, functionHandler: (input: any, context: WorkflowContext) => Promise<FunctionOutput>, context: WorkflowContext ): Promise<TestResult> { const startTime = Date.now(); const errors: string[] = []; const warnings: string[] = []; let passed = false; let score = 0; let actualOutput: any; let complianceResult: GuideCompliance | undefined; try { // Validate input const inputValidation = globalErrorHandler.validateInput(testCase.input, testCase.stepName); if (!inputValidation.valid) { errors.push(...inputValidation.errors); } // Execute the function actualOutput = await functionHandler(testCase.input, context); // Check if function succeeded if (!actualOutput.success) { errors.push(`Function execution failed: ${actualOutput.result?.error || 'Unknown error'}`); } else { score += 30; // Base score for successful execution } // Validate expected output if provided if (testCase.expectedOutput) { const outputMatch = this.compareOutputs(actualOutput.result, testCase.expectedOutput); if (outputMatch.matches) { score += 30; // Score for correct output } else { errors.push(`Output mismatch: ${outputMatch.differences.join(', ')}`); } } // Check guide compliance if (actualOutput.success) { complianceResult = await validateGuideCompliance( testCase.stepName, testCase.input, actualOutput, context ); // Score based on compliance score += Math.round(complianceResult.complianceScore * 0.4); // Up to 40 points if (complianceResult.complianceScore < 70) { warnings.push(`Low guide compliance: ${complianceResult.complianceScore}%`); } // Check expected compliance if provided if (testCase.expectedCompliance) { const complianceMatch = this.checkComplianceExpectations( complianceResult, testCase.expectedCompliance ); if (!complianceMatch.matches) { warnings.push(...complianceMatch.issues); } } } // Determine if test passed passed = errors.length === 0 && score >= 70; } catch (error) { errors.push(`Test execution error: ${error.message}`); score = 0; } const executionTime = Date.now() - startTime; return { testId: testCase.id, passed, score, errors, warnings, actualOutput, complianceResult, executionTime, metadata: { category: testCase.category, priority: testCase.priority, stepName: testCase.stepName, }, }; } /** * Run all tests in a suite */ async runTestSuite( suiteName: string, functionHandlers: Map<string, (input: any, context: WorkflowContext) => Promise<FunctionOutput>>, context: WorkflowContext ): Promise<TestResult[]> { const suite = this.testSuites.get(suiteName); if (!suite) { throw new Error(`Test suite not found: ${suiteName}`); } const results: TestResult[] = []; // Run setup if provided if (suite.setup) { await suite.setup(); } try { for (const testCase of suite.tests) { const handler = functionHandlers.get(testCase.stepName); if (!handler) { results.push({ testId: testCase.id, passed: false, score: 0, errors: [`No handler found for step: ${testCase.stepName}`], warnings: [], executionTime: 0, metadata: { category: testCase.category, priority: testCase.priority }, }); continue; } const result = await this.runTest(testCase, handler, context); results.push(result); } } finally { // Run teardown if provided if (suite.teardown) { await suite.teardown(); } } this.results.set(suiteName, results); return results; } /** * Generate test report */ generateReport(suiteName?: string): { summary: { totalTests: number; passed: number; failed: number; averageScore: number; averageExecutionTime: number; }; details: TestResult[]; recommendations: string[]; } { let allResults: TestResult[] = []; if (suiteName) { allResults = this.results.get(suiteName) || []; } else { for (const results of this.results.values()) { allResults.push(...results); } } const totalTests = allResults.length; const passed = allResults.filter(r => r.passed).length; const failed = totalTests - passed; const averageScore = totalTests > 0 ? allResults.reduce((sum, r) => sum + r.score, 0) / totalTests : 0; const averageExecutionTime = totalTests > 0 ? allResults.reduce((sum, r) => sum + r.executionTime, 0) / totalTests : 0; const recommendations = this.generateRecommendations(allResults); return { summary: { totalTests, passed, failed, averageScore, averageExecutionTime, }, details: allResults, recommendations, }; } /** * Compare actual vs expected outputs */ private compareOutputs(actual: any, expected: any): { matches: boolean; differences: string[]; } { const differences: string[] = []; if (typeof actual !== typeof expected) { differences.push(`Type mismatch: expected ${typeof expected}, got ${typeof actual}`); return { matches: false, differences }; } if (typeof actual === 'object' && actual !== null && expected !== null) { for (const key in expected) { if (!(key in actual)) { differences.push(`Missing property: ${key}`); } else if (actual[key] !== expected[key]) { differences.push(`Property ${key}: expected ${expected[key]}, got ${actual[key]}`); } } } else if (actual !== expected) { differences.push(`Value mismatch: expected ${expected}, got ${actual}`); } return { matches: differences.length === 0, differences, }; } /** * Check compliance expectations */ private checkComplianceExpectations( actual: GuideCompliance, expected: Partial<GuideCompliance> ): { matches: boolean; issues: string[] } { const issues: string[] = []; if (expected.complianceScore !== undefined) { if (actual.complianceScore < expected.complianceScore) { issues.push(`Compliance score below expected: ${actual.complianceScore} < ${expected.complianceScore}`); } } if (expected.maximsApplied) { for (const maxim of expected.maximsApplied) { if (!actual.maximsApplied.includes(maxim)) { issues.push(`Expected maxim not applied: ${maxim}`); } } } return { matches: issues.length === 0, issues, }; } /** * Generate recommendations based on test results */ private generateRecommendations(results: TestResult[]): string[] { const recommendations: string[] = []; const failedTests = results.filter(r => !r.passed); const lowScoreTests = results.filter(r => r.score < 70); const slowTests = results.filter(r => r.executionTime > 5000); // > 5 seconds if (failedTests.length > 0) { recommendations.push(`${failedTests.length} tests failed. Review error messages and fix issues.`); } if (lowScoreTests.length > 0) { recommendations.push(`${lowScoreTests.length} tests have low scores. Improve implementation quality.`); } if (slowTests.length > 0) { recommendations.push(`${slowTests.length} tests are slow. Optimize performance.`); } const complianceIssues = results.filter(r => r.complianceResult && r.complianceResult.complianceScore < 70 ); if (complianceIssues.length > 0) { recommendations.push(`${complianceIssues.length} tests have guide compliance issues. Review maxims and heuristics.`); } return recommendations; } } /** * Global test runner instance */ export const globalTestRunner = new IncrementalTestRunner(); /** * Predefined test suites for common scenarios */ export const PREDEFINED_TEST_SUITES = { cognitiveAnalysis: { name: 'Cognitive Analysis Tests', description: 'Test suite for cognitive analysis function', tests: [ { id: 'cognitive-basic', name: 'Basic Cognitive Analysis', description: 'Test basic cognitive analysis functionality', input: { researchData: { findings: ['test finding 1', 'test finding 2'] }, improvedPrompt: 'Create a simple web application', analysisDepth: 'deep', }, stepName: 'cognitive', category: 'unit' as const, priority: 'high' as const, expectedCompliance: { complianceScore: 70, maximsApplied: ['PrimedCognition', 'AppropriateComplexity'], }, }, { id: 'cognitive-vscode-extension', name: 'VSCode Extension Domain Analysis', description: 'Test domain-specific analysis for VSCode extensions', input: { researchData: { findings: ['VSCode API documentation', 'Extension best practices'] }, improvedPrompt: 'Fix webview postMessage error in VSCode extension', analysisDepth: 'comprehensive', }, stepName: 'cognitive', category: 'domain-specific' as const, priority: 'critical' as const, expectedCompliance: { complianceScore: 80, maximsApplied: ['PrimedCognition', 'AppropriateComplexity', 'Autonomy'], }, }, { id: 'cognitive-error-handling', name: 'Error Handling Test', description: 'Test cognitive analysis with invalid input', input: { researchData: null, improvedPrompt: '', analysisDepth: 'deep', }, stepName: 'cognitive', category: 'unit' as const, priority: 'medium' as const, }, ], }, problemSolver: { name: 'Problem Solver Tests', description: 'Test suite for problem solver function', tests: [ { id: 'problem-solver-basic', name: 'Basic Problem Solving', description: 'Test basic problem solving functionality', input: { error: 'TypeError: Cannot read properties of undefined', context: { file: 'extension.ts', line: 42 }, strictMode: true, }, stepName: 'problem-solver', category: 'unit' as const, priority: 'high' as const, expectedCompliance: { complianceScore: 75, maximsApplied: ['Autonomy', 'Resilience', 'OOTBProblemSolving'], }, }, { id: 'problem-solver-webview', name: 'WebView Communication Error', description: 'Test solving webview postMessage errors', input: { error: 'Failed to simulate Enter key: TypeError: Cannot read properties of undefined (reading \'postMessage\')', context: { component: 'webview', action: 'postMessage', extension: 'augment-auto-send', }, strictMode: true, }, stepName: 'problem-solver', category: 'domain-specific' as const, priority: 'critical' as const, expectedCompliance: { complianceScore: 85, maximsApplied: ['Autonomy', 'Resilience', 'PurityAndCleanliness'], }, }, ], }, guideCompliance: { name: 'Guide Compliance Tests', description: 'Test suite for guide compliance validation', tests: [ { id: 'compliance-all-maxims', name: 'All Maxims Applied', description: 'Test that all 13 maxims can be applied', input: { userPrompt: 'Comprehensively improve the MCP workflow server with all guide maxims', }, stepName: 'improve-prompt', category: 'compliance' as const, priority: 'critical' as const, expectedCompliance: { complianceScore: 90, maximsApplied: [ 'PrimedCognition', 'AppropriateComplexity', 'FullyUnleashedPotential', 'ClearCommunication', 'PurposefulToolLeveraging', 'ToolAssistedDiagnosis', 'Autonomy', 'PurityAndCleanliness', 'Perceptivity', 'Impenetrability', 'Resilience', 'Consistency', 'OperationalFlexibility', ], }, }, { id: 'compliance-heuristics', name: 'Heuristics Application', description: 'Test that heuristics are properly applied', input: { userPrompt: 'Design a SOLID architecture with SMART goals for responsive UI', }, stepName: 'improve-prompt', category: 'compliance' as const, priority: 'high' as const, expectedCompliance: { complianceScore: 80, heuristicsApplied: ['SOLID', 'SMART', 'Responsive UI'], }, }, ], }, }; // Register predefined test suites Object.entries(PREDEFINED_TEST_SUITES).forEach(([name, suite]) => { globalTestRunner.registerTestSuite(name, suite); });