UNPKG

mcp-workflow-server-enhanced

Version:

Enhanced MCP Workflow Server with smart problem routing, comprehensive validation, guide compliance, and robust error handling. Intelligently routes to appropriate AI functions based on problem type.

514 lines (462 loc) 15.2 kB
import { WorkflowContext, GuideCompliance, FunctionOutput } from './types.js'; import { validateGuideCompliance } from './validation.js'; import { globalErrorHandler } from './error-handling.js'; /** * Incremental Testing Framework for MCP Workflow Server * * This framework provides comprehensive testing infrastructure for validating * problem-solving accuracy, guide compliance, and workflow functionality. */ export interface TestCase { id: string; name: string; description: string; input: any; expectedOutput?: any; expectedCompliance?: Partial<GuideCompliance>; stepName: string; category: 'unit' | 'integration' | 'compliance' | 'domain-specific'; priority: 'low' | 'medium' | 'high' | 'critical'; } export interface TestResult { testId: string; passed: boolean; score: number; // 0-100 errors: string[]; warnings: string[]; actualOutput?: any; complianceResult?: GuideCompliance; executionTime: number; metadata: Record<string, any>; } export interface TestSuite { name: string; description: string; tests: TestCase[]; setup?: () => Promise<void>; teardown?: () => Promise<void>; } /** * Test runner for incremental validation */ export class IncrementalTestRunner { private testSuites: Map<string, TestSuite> = new Map(); private results: Map<string, TestResult[]> = new Map(); /** * Register a test suite */ registerTestSuite(name: string, suite: TestSuite): void { this.testSuites.set(name, suite); } /** * Run a specific test case */ async runTest( testCase: TestCase, functionHandler: (input: any, context: WorkflowContext) => Promise<FunctionOutput>, context: WorkflowContext ): Promise<TestResult> { const startTime = Date.now(); const errors: string[] = []; const warnings: string[] = []; let passed = false; let score = 0; let actualOutput: any; let complianceResult: GuideCompliance | undefined; try { // Validate input const inputValidation = globalErrorHandler.validateInput(testCase.input, testCase.stepName); if (!inputValidation.valid) { errors.push(...inputValidation.errors); } // Execute the function actualOutput = await functionHandler(testCase.input, context); // Check if function succeeded if (!actualOutput.success) { errors.push(`Function execution failed: ${actualOutput.result?.error || 'Unknown error'}`); } else { score += 30; // Base score for successful execution } // Validate expected output if provided if (testCase.expectedOutput) { const outputMatch = this.compareOutputs(actualOutput.result, testCase.expectedOutput); if (outputMatch.matches) { score += 30; // Score for correct output } else { errors.push(`Output mismatch: ${outputMatch.differences.join(', ')}`); } } // Check guide compliance if (actualOutput.success) { complianceResult = await validateGuideCompliance( testCase.stepName, testCase.input, actualOutput, context ); // Score based on compliance score += Math.round(complianceResult.complianceScore * 0.4); // Up to 40 points if (complianceResult.complianceScore < 70) { warnings.push(`Low guide compliance: ${complianceResult.complianceScore}%`); } // Check expected compliance if provided if (testCase.expectedCompliance) { const complianceMatch = this.checkComplianceExpectations( complianceResult, testCase.expectedCompliance ); if (!complianceMatch.matches) { warnings.push(...complianceMatch.issues); } } } // Determine if test passed passed = errors.length === 0 && score >= 70; } catch (error) { errors.push(`Test execution error: ${error.message}`); score = 0; } const executionTime = Date.now() - startTime; return { testId: testCase.id, passed, score, errors, warnings, actualOutput, complianceResult, executionTime, metadata: { category: testCase.category, priority: testCase.priority, stepName: testCase.stepName, }, }; } /** * Run all tests in a suite */ async runTestSuite( suiteName: string, functionHandlers: Map<string, (input: any, context: WorkflowContext) => Promise<FunctionOutput>>, context: WorkflowContext ): Promise<TestResult[]> { const suite = this.testSuites.get(suiteName); if (!suite) { throw new Error(`Test suite not found: ${suiteName}`); } const results: TestResult[] = []; // Run setup if provided if (suite.setup) { await suite.setup(); } try { for (const testCase of suite.tests) { const handler = functionHandlers.get(testCase.stepName); if (!handler) { results.push({ testId: testCase.id, passed: false, score: 0, errors: [`No handler found for step: ${testCase.stepName}`], warnings: [], executionTime: 0, metadata: { category: testCase.category, priority: testCase.priority }, }); continue; } const result = await this.runTest(testCase, handler, context); results.push(result); } } finally { // Run teardown if provided if (suite.teardown) { await suite.teardown(); } } this.results.set(suiteName, results); return results; } /** * Generate test report */ generateReport(suiteName?: string): { summary: { totalTests: number; passed: number; failed: number; averageScore: number; averageExecutionTime: number; }; details: TestResult[]; recommendations: string[]; } { let allResults: TestResult[] = []; if (suiteName) { allResults = this.results.get(suiteName) || []; } else { for (const results of this.results.values()) { allResults.push(...results); } } const totalTests = allResults.length; const passed = allResults.filter(r => r.passed).length; const failed = totalTests - passed; const averageScore = totalTests > 0 ? allResults.reduce((sum, r) => sum + r.score, 0) / totalTests : 0; const averageExecutionTime = totalTests > 0 ? allResults.reduce((sum, r) => sum + r.executionTime, 0) / totalTests : 0; const recommendations = this.generateRecommendations(allResults); return { summary: { totalTests, passed, failed, averageScore, averageExecutionTime, }, details: allResults, recommendations, }; } /** * Compare actual vs expected outputs */ private compareOutputs(actual: any, expected: any): { matches: boolean; differences: string[]; } { const differences: string[] = []; if (typeof actual !== typeof expected) { differences.push(`Type mismatch: expected ${typeof expected}, got ${typeof actual}`); return { matches: false, differences }; } if (typeof actual === 'object' && actual !== null && expected !== null) { for (const key in expected) { if (!(key in actual)) { differences.push(`Missing property: ${key}`); } else if (actual[key] !== expected[key]) { differences.push(`Property ${key}: expected ${expected[key]}, got ${actual[key]}`); } } } else if (actual !== expected) { differences.push(`Value mismatch: expected ${expected}, got ${actual}`); } return { matches: differences.length === 0, differences, }; } /** * Check compliance expectations */ private checkComplianceExpectations( actual: GuideCompliance, expected: Partial<GuideCompliance> ): { matches: boolean; issues: string[] } { const issues: string[] = []; if (expected.complianceScore !== undefined) { if (actual.complianceScore < expected.complianceScore) { issues.push(`Compliance score below expected: ${actual.complianceScore} < ${expected.complianceScore}`); } } if (expected.maximsApplied) { for (const maxim of expected.maximsApplied) { if (!actual.maximsApplied.includes(maxim)) { issues.push(`Expected maxim not applied: ${maxim}`); } } } return { matches: issues.length === 0, issues, }; } /** * Generate recommendations based on test results */ private generateRecommendations(results: TestResult[]): string[] { const recommendations: string[] = []; const failedTests = results.filter(r => !r.passed); const lowScoreTests = results.filter(r => r.score < 70); const slowTests = results.filter(r => r.executionTime > 5000); // > 5 seconds if (failedTests.length > 0) { recommendations.push(`${failedTests.length} tests failed. Review error messages and fix issues.`); } if (lowScoreTests.length > 0) { recommendations.push(`${lowScoreTests.length} tests have low scores. Improve implementation quality.`); } if (slowTests.length > 0) { recommendations.push(`${slowTests.length} tests are slow. Optimize performance.`); } const complianceIssues = results.filter(r => r.complianceResult && r.complianceResult.complianceScore < 70 ); if (complianceIssues.length > 0) { recommendations.push(`${complianceIssues.length} tests have guide compliance issues. Review maxims and heuristics.`); } return recommendations; } } /** * Global test runner instance */ export const globalTestRunner = new IncrementalTestRunner(); /** * Predefined test suites for common scenarios */ export const PREDEFINED_TEST_SUITES = { cognitiveAnalysis: { name: 'Cognitive Analysis Tests', description: 'Test suite for cognitive analysis function', tests: [ { id: 'cognitive-basic', name: 'Basic Cognitive Analysis', description: 'Test basic cognitive analysis functionality', input: { researchData: { findings: ['test finding 1', 'test finding 2'] }, improvedPrompt: 'Create a simple web application', analysisDepth: 'deep', }, stepName: 'cognitive', category: 'unit' as const, priority: 'high' as const, expectedCompliance: { complianceScore: 70, maximsApplied: ['PrimedCognition', 'AppropriateComplexity'], }, }, { id: 'cognitive-vscode-extension', name: 'VSCode Extension Domain Analysis', description: 'Test domain-specific analysis for VSCode extensions', input: { researchData: { findings: ['VSCode API documentation', 'Extension best practices'] }, improvedPrompt: 'Fix webview postMessage error in VSCode extension', analysisDepth: 'comprehensive', }, stepName: 'cognitive', category: 'domain-specific' as const, priority: 'critical' as const, expectedCompliance: { complianceScore: 80, maximsApplied: ['PrimedCognition', 'AppropriateComplexity', 'Autonomy'], }, }, { id: 'cognitive-error-handling', name: 'Error Handling Test', description: 'Test cognitive analysis with invalid input', input: { researchData: null, improvedPrompt: '', analysisDepth: 'deep', }, stepName: 'cognitive', category: 'unit' as const, priority: 'medium' as const, }, ], }, problemSolver: { name: 'Problem Solver Tests', description: 'Test suite for problem solver function', tests: [ { id: 'problem-solver-basic', name: 'Basic Problem Solving', description: 'Test basic problem solving functionality', input: { error: 'TypeError: Cannot read properties of undefined', context: { file: 'extension.ts', line: 42 }, strictMode: true, }, stepName: 'problem-solver', category: 'unit' as const, priority: 'high' as const, expectedCompliance: { complianceScore: 75, maximsApplied: ['Autonomy', 'Resilience', 'OOTBProblemSolving'], }, }, { id: 'problem-solver-webview', name: 'WebView Communication Error', description: 'Test solving webview postMessage errors', input: { error: 'Failed to simulate Enter key: TypeError: Cannot read properties of undefined (reading \'postMessage\')', context: { component: 'webview', action: 'postMessage', extension: 'augment-auto-send', }, strictMode: true, }, stepName: 'problem-solver', category: 'domain-specific' as const, priority: 'critical' as const, expectedCompliance: { complianceScore: 85, maximsApplied: ['Autonomy', 'Resilience', 'PurityAndCleanliness'], }, }, ], }, guideCompliance: { name: 'Guide Compliance Tests', description: 'Test suite for guide compliance validation', tests: [ { id: 'compliance-all-maxims', name: 'All Maxims Applied', description: 'Test that all 13 maxims can be applied', input: { userPrompt: 'Comprehensively improve the MCP workflow server with all guide maxims', }, stepName: 'improve-prompt', category: 'compliance' as const, priority: 'critical' as const, expectedCompliance: { complianceScore: 90, maximsApplied: [ 'PrimedCognition', 'AppropriateComplexity', 'FullyUnleashedPotential', 'ClearCommunication', 'PurposefulToolLeveraging', 'ToolAssistedDiagnosis', 'Autonomy', 'PurityAndCleanliness', 'Perceptivity', 'Impenetrability', 'Resilience', 'Consistency', 'OperationalFlexibility', ], }, }, { id: 'compliance-heuristics', name: 'Heuristics Application', description: 'Test that heuristics are properly applied', input: { userPrompt: 'Design a SOLID architecture with SMART goals for responsive UI', }, stepName: 'improve-prompt', category: 'compliance' as const, priority: 'high' as const, expectedCompliance: { complianceScore: 80, heuristicsApplied: ['SOLID', 'SMART', 'Responsive UI'], }, }, ], }, }; // Register predefined test suites Object.entries(PREDEFINED_TEST_SUITES).forEach(([name, suite]) => { globalTestRunner.registerTestSuite(name, suite); });