mcp-workflow-server-enhanced
Version:
Enhanced MCP Workflow Server with smart problem routing, comprehensive validation, guide compliance, and robust error handling. Intelligently routes to appropriate AI functions based on problem type.
514 lines (462 loc) • 15.2 kB
text/typescript
import { WorkflowContext, GuideCompliance, FunctionOutput } from './types.js';
import { validateGuideCompliance } from './validation.js';
import { globalErrorHandler } from './error-handling.js';
/**
* Incremental Testing Framework for MCP Workflow Server
*
* This framework provides comprehensive testing infrastructure for validating
* problem-solving accuracy, guide compliance, and workflow functionality.
*/
export interface TestCase {
id: string;
name: string;
description: string;
input: any;
expectedOutput?: any;
expectedCompliance?: Partial<GuideCompliance>;
stepName: string;
category: 'unit' | 'integration' | 'compliance' | 'domain-specific';
priority: 'low' | 'medium' | 'high' | 'critical';
}
export interface TestResult {
testId: string;
passed: boolean;
score: number; // 0-100
errors: string[];
warnings: string[];
actualOutput?: any;
complianceResult?: GuideCompliance;
executionTime: number;
metadata: Record<string, any>;
}
export interface TestSuite {
name: string;
description: string;
tests: TestCase[];
setup?: () => Promise<void>;
teardown?: () => Promise<void>;
}
/**
* Test runner for incremental validation
*/
export class IncrementalTestRunner {
private testSuites: Map<string, TestSuite> = new Map();
private results: Map<string, TestResult[]> = new Map();
/**
* Register a test suite
*/
registerTestSuite(name: string, suite: TestSuite): void {
this.testSuites.set(name, suite);
}
/**
* Run a specific test case
*/
async runTest(
testCase: TestCase,
functionHandler: (input: any, context: WorkflowContext) => Promise<FunctionOutput>,
context: WorkflowContext
): Promise<TestResult> {
const startTime = Date.now();
const errors: string[] = [];
const warnings: string[] = [];
let passed = false;
let score = 0;
let actualOutput: any;
let complianceResult: GuideCompliance | undefined;
try {
// Validate input
const inputValidation = globalErrorHandler.validateInput(testCase.input, testCase.stepName);
if (!inputValidation.valid) {
errors.push(...inputValidation.errors);
}
// Execute the function
actualOutput = await functionHandler(testCase.input, context);
// Check if function succeeded
if (!actualOutput.success) {
errors.push(`Function execution failed: ${actualOutput.result?.error || 'Unknown error'}`);
} else {
score += 30; // Base score for successful execution
}
// Validate expected output if provided
if (testCase.expectedOutput) {
const outputMatch = this.compareOutputs(actualOutput.result, testCase.expectedOutput);
if (outputMatch.matches) {
score += 30; // Score for correct output
} else {
errors.push(`Output mismatch: ${outputMatch.differences.join(', ')}`);
}
}
// Check guide compliance
if (actualOutput.success) {
complianceResult = await validateGuideCompliance(
testCase.stepName,
testCase.input,
actualOutput,
context
);
// Score based on compliance
score += Math.round(complianceResult.complianceScore * 0.4); // Up to 40 points
if (complianceResult.complianceScore < 70) {
warnings.push(`Low guide compliance: ${complianceResult.complianceScore}%`);
}
// Check expected compliance if provided
if (testCase.expectedCompliance) {
const complianceMatch = this.checkComplianceExpectations(
complianceResult,
testCase.expectedCompliance
);
if (!complianceMatch.matches) {
warnings.push(...complianceMatch.issues);
}
}
}
// Determine if test passed
passed = errors.length === 0 && score >= 70;
} catch (error) {
errors.push(`Test execution error: ${error.message}`);
score = 0;
}
const executionTime = Date.now() - startTime;
return {
testId: testCase.id,
passed,
score,
errors,
warnings,
actualOutput,
complianceResult,
executionTime,
metadata: {
category: testCase.category,
priority: testCase.priority,
stepName: testCase.stepName,
},
};
}
/**
* Run all tests in a suite
*/
async runTestSuite(
suiteName: string,
functionHandlers: Map<string, (input: any, context: WorkflowContext) => Promise<FunctionOutput>>,
context: WorkflowContext
): Promise<TestResult[]> {
const suite = this.testSuites.get(suiteName);
if (!suite) {
throw new Error(`Test suite not found: ${suiteName}`);
}
const results: TestResult[] = [];
// Run setup if provided
if (suite.setup) {
await suite.setup();
}
try {
for (const testCase of suite.tests) {
const handler = functionHandlers.get(testCase.stepName);
if (!handler) {
results.push({
testId: testCase.id,
passed: false,
score: 0,
errors: [`No handler found for step: ${testCase.stepName}`],
warnings: [],
executionTime: 0,
metadata: { category: testCase.category, priority: testCase.priority },
});
continue;
}
const result = await this.runTest(testCase, handler, context);
results.push(result);
}
} finally {
// Run teardown if provided
if (suite.teardown) {
await suite.teardown();
}
}
this.results.set(suiteName, results);
return results;
}
/**
* Generate test report
*/
generateReport(suiteName?: string): {
summary: {
totalTests: number;
passed: number;
failed: number;
averageScore: number;
averageExecutionTime: number;
};
details: TestResult[];
recommendations: string[];
} {
let allResults: TestResult[] = [];
if (suiteName) {
allResults = this.results.get(suiteName) || [];
} else {
for (const results of this.results.values()) {
allResults.push(...results);
}
}
const totalTests = allResults.length;
const passed = allResults.filter(r => r.passed).length;
const failed = totalTests - passed;
const averageScore = totalTests > 0
? allResults.reduce((sum, r) => sum + r.score, 0) / totalTests
: 0;
const averageExecutionTime = totalTests > 0
? allResults.reduce((sum, r) => sum + r.executionTime, 0) / totalTests
: 0;
const recommendations = this.generateRecommendations(allResults);
return {
summary: {
totalTests,
passed,
failed,
averageScore,
averageExecutionTime,
},
details: allResults,
recommendations,
};
}
/**
* Compare actual vs expected outputs
*/
private compareOutputs(actual: any, expected: any): {
matches: boolean;
differences: string[];
} {
const differences: string[] = [];
if (typeof actual !== typeof expected) {
differences.push(`Type mismatch: expected ${typeof expected}, got ${typeof actual}`);
return { matches: false, differences };
}
if (typeof actual === 'object' && actual !== null && expected !== null) {
for (const key in expected) {
if (!(key in actual)) {
differences.push(`Missing property: ${key}`);
} else if (actual[key] !== expected[key]) {
differences.push(`Property ${key}: expected ${expected[key]}, got ${actual[key]}`);
}
}
} else if (actual !== expected) {
differences.push(`Value mismatch: expected ${expected}, got ${actual}`);
}
return {
matches: differences.length === 0,
differences,
};
}
/**
* Check compliance expectations
*/
private checkComplianceExpectations(
actual: GuideCompliance,
expected: Partial<GuideCompliance>
): { matches: boolean; issues: string[] } {
const issues: string[] = [];
if (expected.complianceScore !== undefined) {
if (actual.complianceScore < expected.complianceScore) {
issues.push(`Compliance score below expected: ${actual.complianceScore} < ${expected.complianceScore}`);
}
}
if (expected.maximsApplied) {
for (const maxim of expected.maximsApplied) {
if (!actual.maximsApplied.includes(maxim)) {
issues.push(`Expected maxim not applied: ${maxim}`);
}
}
}
return {
matches: issues.length === 0,
issues,
};
}
/**
* Generate recommendations based on test results
*/
private generateRecommendations(results: TestResult[]): string[] {
const recommendations: string[] = [];
const failedTests = results.filter(r => !r.passed);
const lowScoreTests = results.filter(r => r.score < 70);
const slowTests = results.filter(r => r.executionTime > 5000); // > 5 seconds
if (failedTests.length > 0) {
recommendations.push(`${failedTests.length} tests failed. Review error messages and fix issues.`);
}
if (lowScoreTests.length > 0) {
recommendations.push(`${lowScoreTests.length} tests have low scores. Improve implementation quality.`);
}
if (slowTests.length > 0) {
recommendations.push(`${slowTests.length} tests are slow. Optimize performance.`);
}
const complianceIssues = results.filter(r =>
r.complianceResult && r.complianceResult.complianceScore < 70
);
if (complianceIssues.length > 0) {
recommendations.push(`${complianceIssues.length} tests have guide compliance issues. Review maxims and heuristics.`);
}
return recommendations;
}
}
/**
* Global test runner instance
*/
export const globalTestRunner = new IncrementalTestRunner();
/**
* Predefined test suites for common scenarios
*/
export const PREDEFINED_TEST_SUITES = {
cognitiveAnalysis: {
name: 'Cognitive Analysis Tests',
description: 'Test suite for cognitive analysis function',
tests: [
{
id: 'cognitive-basic',
name: 'Basic Cognitive Analysis',
description: 'Test basic cognitive analysis functionality',
input: {
researchData: { findings: ['test finding 1', 'test finding 2'] },
improvedPrompt: 'Create a simple web application',
analysisDepth: 'deep',
},
stepName: 'cognitive',
category: 'unit' as const,
priority: 'high' as const,
expectedCompliance: {
complianceScore: 70,
maximsApplied: ['PrimedCognition', 'AppropriateComplexity'],
},
},
{
id: 'cognitive-vscode-extension',
name: 'VSCode Extension Domain Analysis',
description: 'Test domain-specific analysis for VSCode extensions',
input: {
researchData: { findings: ['VSCode API documentation', 'Extension best practices'] },
improvedPrompt: 'Fix webview postMessage error in VSCode extension',
analysisDepth: 'comprehensive',
},
stepName: 'cognitive',
category: 'domain-specific' as const,
priority: 'critical' as const,
expectedCompliance: {
complianceScore: 80,
maximsApplied: ['PrimedCognition', 'AppropriateComplexity', 'Autonomy'],
},
},
{
id: 'cognitive-error-handling',
name: 'Error Handling Test',
description: 'Test cognitive analysis with invalid input',
input: {
researchData: null,
improvedPrompt: '',
analysisDepth: 'deep',
},
stepName: 'cognitive',
category: 'unit' as const,
priority: 'medium' as const,
},
],
},
problemSolver: {
name: 'Problem Solver Tests',
description: 'Test suite for problem solver function',
tests: [
{
id: 'problem-solver-basic',
name: 'Basic Problem Solving',
description: 'Test basic problem solving functionality',
input: {
error: 'TypeError: Cannot read properties of undefined',
context: { file: 'extension.ts', line: 42 },
strictMode: true,
},
stepName: 'problem-solver',
category: 'unit' as const,
priority: 'high' as const,
expectedCompliance: {
complianceScore: 75,
maximsApplied: ['Autonomy', 'Resilience', 'OOTBProblemSolving'],
},
},
{
id: 'problem-solver-webview',
name: 'WebView Communication Error',
description: 'Test solving webview postMessage errors',
input: {
error: 'Failed to simulate Enter key: TypeError: Cannot read properties of undefined (reading \'postMessage\')',
context: {
component: 'webview',
action: 'postMessage',
extension: 'augment-auto-send',
},
strictMode: true,
},
stepName: 'problem-solver',
category: 'domain-specific' as const,
priority: 'critical' as const,
expectedCompliance: {
complianceScore: 85,
maximsApplied: ['Autonomy', 'Resilience', 'PurityAndCleanliness'],
},
},
],
},
guideCompliance: {
name: 'Guide Compliance Tests',
description: 'Test suite for guide compliance validation',
tests: [
{
id: 'compliance-all-maxims',
name: 'All Maxims Applied',
description: 'Test that all 13 maxims can be applied',
input: {
userPrompt: 'Comprehensively improve the MCP workflow server with all guide maxims',
},
stepName: 'improve-prompt',
category: 'compliance' as const,
priority: 'critical' as const,
expectedCompliance: {
complianceScore: 90,
maximsApplied: [
'PrimedCognition',
'AppropriateComplexity',
'FullyUnleashedPotential',
'ClearCommunication',
'PurposefulToolLeveraging',
'ToolAssistedDiagnosis',
'Autonomy',
'PurityAndCleanliness',
'Perceptivity',
'Impenetrability',
'Resilience',
'Consistency',
'OperationalFlexibility',
],
},
},
{
id: 'compliance-heuristics',
name: 'Heuristics Application',
description: 'Test that heuristics are properly applied',
input: {
userPrompt: 'Design a SOLID architecture with SMART goals for responsive UI',
},
stepName: 'improve-prompt',
category: 'compliance' as const,
priority: 'high' as const,
expectedCompliance: {
complianceScore: 80,
heuristicsApplied: ['SOLID', 'SMART', 'Responsive UI'],
},
},
],
},
};
// Register predefined test suites
Object.entries(PREDEFINED_TEST_SUITES).forEach(([name, suite]) => {
globalTestRunner.registerTestSuite(name, suite);
});