@gork-labs/secondbrain-mcp
Version:
Second Brain MCP Server - Agent team orchestration with dynamic tool discovery
717 lines (716 loc) • 29.2 kB
JavaScript
import { logger } from '../utils/logger.js';
import { config } from '../utils/config.js';
/**
* Core quality validation system for sub-agent responses
* Provides rule-based validation with subagent-specific criteria
*/
export class QualityValidator {
universalRules = [];
subagentRules = new Map();
subagentConfigs = new Map();
constructor() {
this.initializeUniversalRules();
this.loadSubagentConfigs();
}
/**
* Validate a sub-agent response using comprehensive quality rules
*/
async validateResponse(response, context) {
const startTime = Date.now();
try {
// Get applicable rules for this subagent
const applicableRules = this.getApplicableRules(context.subagent);
const subagentConfig = this.getSubagentConfig(context.subagent);
// Execute all quality rules
const ruleResults = [];
for (const rule of applicableRules) {
try {
const result = rule.evaluator(response, context);
result.category = rule.category;
ruleResults.push(result);
}
catch (error) {
logger.warn('Quality rule execution failed', {
rule: rule.name,
error: error instanceof Error ? error.message : String(error)
});
// Add a failed result for this rule
ruleResults.push({
passed: false,
score: 0,
feedback: `Rule evaluation failed: ${rule.name}`,
severity: 'minor',
category: rule.category
});
}
}
// Calculate weighted overall score
const overallScore = this.calculateWeightedScore(ruleResults, applicableRules);
// Categorize results
const categories = this.categorizeResults(ruleResults);
// Generate recommendations and identify critical issues
const recommendations = this.generateRecommendations(ruleResults, context);
const criticalIssues = this.extractCriticalIssues(ruleResults);
// Determine if refinement would help
const canRefine = this.assessRefinementPotential(ruleResults, context);
const refinementSuggestions = canRefine ? this.generateRefinementSuggestions(ruleResults) : [];
const assessment = {
overallScore,
passed: overallScore >= subagentConfig.qualityThreshold,
qualityThreshold: subagentConfig.qualityThreshold,
ruleResults,
categories,
recommendations,
criticalIssues,
confidence: this.assessConfidence(ruleResults, overallScore),
processingTime: Math.max(1, Date.now() - startTime), // Ensure at least 1ms for test consistency
canRefine,
refinementSuggestions
};
logger.info('Quality validation completed', {
subagent: context.subagent,
score: overallScore,
passed: assessment.passed,
processingTime: assessment.processingTime,
ruleCount: ruleResults.length
});
return assessment;
}
catch (error) {
logger.error('Quality validation failed', {
subagent: context.subagent,
error: error instanceof Error ? error.message : String(error)
});
// Return a minimal assessment indicating failure
return this.createFailureAssessment(context, Date.now() - startTime, error);
}
}
/**
* Initialize universal quality rules that apply to all chatmodes
*/
initializeUniversalRules() {
this.universalRules = [
{
name: 'format_compliance',
category: 'format',
weight: 0.2,
applicableToAll: true,
evaluator: this.evaluateFormatCompliance.bind(this)
},
{
name: 'deliverables_completeness',
category: 'completeness',
weight: 0.25,
applicableToAll: true,
evaluator: this.evaluateDeliverablesCompleteness.bind(this)
},
{
name: 'memory_operations_validity',
category: 'memory',
weight: 0.15,
applicableToAll: true,
evaluator: this.evaluateMemoryOperations.bind(this)
},
{
name: 'task_completion_assessment',
category: 'completion',
weight: 0.2,
applicableToAll: true,
evaluator: this.evaluateTaskCompletion.bind(this)
},
{
name: 'response_quality',
category: 'content',
weight: 0.2,
applicableToAll: true,
evaluator: this.evaluateResponseQuality.bind(this)
},
// NEW: Code-specific analysis validation rules
{
name: 'file_path_specificity',
category: 'specificity',
weight: 0.25,
applicableToAll: false,
evaluator: this.evaluateFilePathSpecificity.bind(this)
},
{
name: 'code_snippet_presence',
category: 'specificity',
weight: 0.25,
applicableToAll: false,
evaluator: this.evaluateCodeSnippetPresence.bind(this)
},
{
name: 'concrete_analysis_depth',
category: 'specificity',
weight: 0.3,
applicableToAll: false,
evaluator: this.evaluateConcreteAnalysisDepth.bind(this)
}
];
}
/**
* Load subagent-specific quality configurations
*/
loadSubagentConfigs() {
// Default configuration for all subagents
const defaultConfig = {
subagent: 'default',
qualityThreshold: config.qualityThreshold || 70,
maxRefinementAttempts: 3,
specificRules: [],
ruleWeights: {},
requiredCategories: ['format', 'completeness']
};
// Chatmode-specific configurations
const subagentConfigs = [
{
...defaultConfig,
subagent: 'Security Engineer',
qualityThreshold: 80,
specificRules: ['security_depth', 'threat_analysis', 'compliance_coverage'],
requiredCategories: ['format', 'completeness', 'security']
},
{
...defaultConfig,
subagent: 'Software Architect',
qualityThreshold: 75,
specificRules: ['architecture_rationale', 'scalability_consideration', 'pattern_appropriateness'],
requiredCategories: ['format', 'completeness', 'architecture']
},
{
...defaultConfig,
subagent: 'Database Architect',
qualityThreshold: 75,
specificRules: ['data_modeling_accuracy', 'performance_implications', 'normalization_quality'],
requiredCategories: ['format', 'completeness', 'database']
},
{
...defaultConfig,
subagent: 'Test Engineer',
qualityThreshold: 75,
specificRules: ['test_coverage_analysis', 'edge_case_identification', 'test_strategy_quality'],
requiredCategories: ['format', 'completeness', 'testing']
}
];
// Store configurations
for (const config of subagentConfigs) {
this.subagentConfigs.set(config.subagent, config);
}
// Set default for any unlisted chatmodes
this.subagentConfigs.set('default', defaultConfig);
}
/**
* Get all applicable rules for a specific chatmode
*/
getApplicableRules(subagent) {
const rules = [...this.universalRules];
const subagentSpecificRules = this.subagentRules.get(subagent) || [];
return rules.concat(subagentSpecificRules);
}
/**
* Get quality configuration for a specific chatmode
*/
getSubagentConfig(subagent) {
return this.subagentConfigs.get(subagent) || this.subagentConfigs.get('default');
}
/**
* Calculate weighted overall score from rule results
*/
calculateWeightedScore(results, rules) {
if (results.length === 0)
return 0;
let totalWeightedScore = 0;
let totalWeight = 0;
for (let i = 0; i < results.length && i < rules.length; i++) {
const result = results[i];
const rule = rules[i];
totalWeightedScore += result.score * rule.weight;
totalWeight += rule.weight;
}
return totalWeight > 0 ? Math.round(totalWeightedScore / totalWeight) : 0;
}
/**
* Categorize rule results by category
*/
categorizeResults(results) {
const categories = {};
const categoryScores = {};
for (const result of results) {
if (!categoryScores[result.category]) {
categoryScores[result.category] = { total: 0, count: 0 };
}
categoryScores[result.category].total += result.score;
categoryScores[result.category].count += 1;
}
for (const [category, data] of Object.entries(categoryScores)) {
categories[category] = Math.round(data.total / data.count);
}
return categories;
}
/**
* Generate improvement recommendations based on rule results
*/
generateRecommendations(results, context) {
const recommendations = [];
for (const result of results) {
if (!result.passed && result.feedback) {
recommendations.push(result.feedback);
}
}
// Add contextual recommendations based on chatmode
if (recommendations.length > 0) {
const subagentConfig = this.getSubagentConfig(context.subagent);
if (subagentConfig.subagent !== 'default') {
recommendations.push(`Consider the specific requirements for ${context.subagent} when refining the response.`);
}
}
return recommendations;
}
/**
* Extract critical issues that must be addressed
*/
extractCriticalIssues(results) {
return results
.filter(result => !result.passed && result.severity === 'critical')
.map(result => result.feedback);
}
/**
* Assess whether refinement would likely improve the response
*/
assessRefinementPotential(results, context) {
// Check if there are any non-critical issues that could be improved
const improvableIssues = results.filter(result => !result.passed && result.severity !== 'critical');
// Check if the overall score is close to passing threshold
const config = this.getSubagentConfig(context.subagent);
const currentScore = this.calculateWeightedScore(results, this.getApplicableRules(context.subagent));
return improvableIssues.length > 0 || (currentScore > config.qualityThreshold * 0.8);
}
/**
* Generate specific suggestions for refinement
*/
generateRefinementSuggestions(results) {
const suggestions = [];
for (const result of results) {
if (!result.passed && result.severity !== 'critical') {
suggestions.push(`Improve ${result.category}: ${result.feedback}`);
}
}
return suggestions;
}
/**
* Assess confidence level in the quality assessment
*/
assessConfidence(results, score) {
const failedRules = results.filter(r => !r.passed).length;
const totalRules = results.length;
if (totalRules === 0)
return 'low';
const successRate = (totalRules - failedRules) / totalRules;
if (score >= 85 && successRate >= 0.9)
return 'high';
if (score >= 60 && successRate >= 0.7)
return 'medium';
return 'low';
}
/**
* Create a failure assessment when validation itself fails
*/
createFailureAssessment(context, processingTime, error) {
const config = this.getSubagentConfig(context.subagent);
return {
overallScore: 0,
passed: false,
qualityThreshold: config.qualityThreshold,
ruleResults: [],
categories: {},
recommendations: ['Quality validation failed - manual review required'],
criticalIssues: [`Validation error: ${error instanceof Error ? error.message : String(error)}`],
confidence: 'low',
processingTime,
canRefine: false,
refinementSuggestions: []
};
}
// Individual quality rule evaluators
evaluateFormatCompliance(response, context) {
try {
// Check required structure
const hasDeliverables = response.deliverables && typeof response.deliverables === 'object';
const hasMemoryOps = Array.isArray(response.memory_operations);
const hasMetadata = response.metadata && typeof response.metadata === 'object';
const structureScore = (hasDeliverables ? 40 : 0) + (hasMemoryOps ? 30 : 0) + (hasMetadata ? 30 : 0);
return {
passed: structureScore >= 80,
score: structureScore,
feedback: structureScore < 80 ? 'Response structure is incomplete - missing required sections' : 'Response format is valid',
severity: structureScore < 50 ? 'critical' : 'important',
category: 'format'
};
}
catch (error) {
return {
passed: false,
score: 0,
feedback: 'Response format validation failed',
severity: 'critical',
category: 'format'
};
}
}
evaluateDeliverablesCompleteness(response, context) {
const deliverables = response.deliverables;
let score = 0;
const issues = [];
// Check for analysis content
if (deliverables.analysis && deliverables.analysis.length > 100) {
score += 40;
}
else {
issues.push('analysis is missing or too brief');
}
// Check for recommendations
if (deliverables.recommendations && deliverables.recommendations.length > 0) {
score += 30;
}
else {
issues.push('recommendations are missing');
}
// Check for documents if applicable
if (deliverables.documents && deliverables.documents.length > 0) {
score += 30;
}
else if (context.expectedDeliverables?.includes('documents')) {
issues.push('expected documents are missing');
}
else {
score += 30; // No documents required
}
const feedback = issues.length > 0
? `Deliverables incomplete: ${issues.join(', ')}`
: 'All required deliverables are present';
// Only mark as critical if score is very low (< 40) indicating major structural issues
const severity = score < 40 ? 'critical' : 'important';
return {
passed: score >= 70,
score,
feedback,
severity,
category: 'completeness'
};
}
evaluateMemoryOperations(response, context) {
const memoryOps = response.memory_operations;
let score = 0;
if (!memoryOps || memoryOps.length === 0) {
return {
passed: true,
score: 80,
feedback: 'No memory operations provided - this is acceptable when no knowledge needs to be captured',
severity: 'minor',
category: 'memory'
};
}
// Check for valid memory operation types
const validOps = ['create_entities', 'add_observations', 'create_relations', 'delete_entities', 'delete_observations', 'delete_relations'];
const validOperations = memoryOps.filter(op => validOps.includes(op.operation));
score = Math.min(100, (validOperations.length / memoryOps.length) * 100);
return {
passed: score >= 80,
score,
feedback: score >= 80 ? 'Memory operations are valid' : 'Some memory operations have invalid types',
severity: score < 50 ? 'important' : 'minor',
category: 'memory'
};
}
evaluateTaskCompletion(response, context) {
const metadata = response.metadata;
let score = 0;
// Task completion status
if (metadata.task_completion_status === 'complete') {
score += 50;
}
else if (metadata.task_completion_status === 'partial') {
score += 30;
}
else {
score += 0;
}
// Confidence level
if (metadata.confidence_level === 'high') {
score += 30;
}
else if (metadata.confidence_level === 'medium') {
score += 20;
}
else {
score += 10;
}
// Processing time reasonableness (assuming reasonable range)
const processingTime = parseInt(metadata.processing_time) || 0;
if (processingTime > 0 && processingTime < 300000) { // Less than 5 minutes
score += 20;
}
const feedback = score >= 70
? 'Task completion indicators are satisfactory'
: 'Task completion assessment indicates potential issues';
return {
passed: score >= 70,
score,
feedback,
severity: score < 40 ? 'important' : 'minor',
category: 'completion'
};
}
evaluateResponseQuality(response, context) {
let score = 0;
const issues = [];
// Content depth analysis
const analysis = response.deliverables.analysis || '';
if (analysis.length > 500) {
score += 30;
}
else if (analysis.length > 200) {
score += 20;
}
else {
issues.push('analysis lacks depth');
}
// Recommendations quality
const recommendations = response.deliverables.recommendations || [];
if (recommendations.length >= 3) {
score += 25;
}
else if (recommendations.length >= 1) {
score += 15;
}
else {
issues.push('insufficient recommendations');
}
// Task alignment
const taskWords = context.requirements.toLowerCase().split(/\s+/);
const responseText = (analysis + ' ' + recommendations.join(' ')).toLowerCase();
const alignmentScore = taskWords.filter(word => responseText.includes(word)).length / taskWords.length;
score += Math.round(alignmentScore * 45);
const feedback = issues.length > 0
? `Content quality issues: ${issues.join(', ')}`
: 'Response content meets quality standards';
return {
passed: score >= 70,
score,
feedback,
severity: 'important',
category: 'content'
};
}
// NEW: Code-specific analysis validation methods
evaluateFilePathSpecificity(response, context) {
// Only apply to technical chatmodes
const technicalSubagents = ['Security Engineer', 'Software Engineer', 'DevOps Engineer', 'Database Architect', 'Test Engineer'];
if (!technicalSubagents.includes(context.subagent)) {
return { passed: true, score: 100, feedback: 'Not applicable for this chatmode', severity: 'minor', category: 'specificity' };
}
const analysis = response.deliverables.analysis || '';
const recommendations = (response.deliverables.recommendations || []).join(' ');
const fullText = `${analysis} ${recommendations}`;
let score = 0;
const issues = [];
// Check for file paths with common patterns
const filePathPatterns = [
/[a-zA-Z0-9_-]+\/[a-zA-Z0-9_\/-]+\.[a-zA-Z0-9]+/g, // path/to/file.ext
/src\/[a-zA-Z0-9_\/-]+/g, // src/ paths
/config\/[a-zA-Z0-9_\/-]+/g, // config/ paths
/\.\/[a-zA-Z0-9_\/-]+/g, // relative paths
/\/[a-zA-Z0-9_\/-]+\.[a-zA-Z0-9]+/g // absolute paths
];
const foundPaths = new Set();
filePathPatterns.forEach(pattern => {
const matches = fullText.match(pattern) || [];
matches.forEach(match => foundPaths.add(match));
});
if (foundPaths.size >= 3) {
score += 50;
}
else if (foundPaths.size >= 1) {
score += 25;
}
else {
issues.push('no specific file paths found');
}
// Check for line number references
const lineNumberPatterns = [
/line\s+\d+/gi,
/lines?\s+\d+(-\d+)?/gi,
/:\d+/g // file.js:123
];
let hasLineNumbers = false;
lineNumberPatterns.forEach(pattern => {
if (pattern.test(fullText)) {
hasLineNumbers = true;
}
});
if (hasLineNumbers) {
score += 30;
}
else {
issues.push('no line number references found');
}
// Check for directory structure mentions
const directoryPatterns = [
/src\/[a-zA-Z0-9_-]+/g,
/components?\/[a-zA-Z0-9_-]+/g,
/utils?\/[a-zA-Z0-9_-]+/g,
/services?\/[a-zA-Z0-9_-]+/g
];
let hasDirectories = false;
directoryPatterns.forEach(pattern => {
if (pattern.test(fullText)) {
hasDirectories = true;
}
});
if (hasDirectories) {
score += 20;
}
const feedback = issues.length > 0
? `File specificity issues: ${issues.join(', ')}. Found ${foundPaths.size} file references.`
: `Good file specificity: Found ${foundPaths.size} specific file paths`;
return {
passed: score >= 50,
score,
feedback,
severity: score < 25 ? 'critical' : 'important',
category: 'specificity'
};
}
evaluateCodeSnippetPresence(response, context) {
// Only apply to technical chatmodes
const technicalSubagents = ['Security Engineer', 'Software Engineer', 'DevOps Engineer', 'Database Architect'];
if (!technicalSubagents.includes(context.subagent)) {
return { passed: true, score: 100, feedback: 'Not applicable for this chatmode', severity: 'minor', category: 'specificity' };
}
const analysis = response.deliverables.analysis || '';
const recommendations = (response.deliverables.recommendations || []).join(' ');
const fullText = `${analysis} ${recommendations}`;
let score = 0;
const issues = [];
// Check for code blocks (markdown or other formats)
const codeBlockPatterns = [
/```[\s\S]*?```/g, // markdown code blocks
/`[^`\n]+`/g, // inline code
/{\s*[\s\S]*?\s*}/g, // code-like braces
/function\s+[a-zA-Z_][a-zA-Z0-9_]*\s*\(/g, // function definitions
/class\s+[a-zA-Z_][a-zA-Z0-9_]*/g, // class definitions
/const\s+[a-zA-Z_][a-zA-Z0-9_]*\s*=/g, // const declarations
/if\s*\([^)]+\)\s*{/g, // if statements
/SELECT\s+[\s\S]*FROM/gi, // SQL queries
/CREATE\s+TABLE/gi // SQL DDL
];
let codeSnippetCount = 0;
codeBlockPatterns.forEach(pattern => {
const matches = fullText.match(pattern) || [];
codeSnippetCount += matches.length;
});
if (codeSnippetCount >= 3) {
score += 60;
}
else if (codeSnippetCount >= 1) {
score += 30;
}
else {
issues.push('no code snippets found');
}
// Check for actual vs generic code examples
const genericTerms = ['example', 'pseudo', 'sample', 'template', 'placeholder'];
const hasGenericTerms = genericTerms.some(term => fullText.toLowerCase().includes(term));
if (codeSnippetCount > 0 && !hasGenericTerms) {
score += 40; // Bonus for real code vs examples
}
else if (codeSnippetCount > 0 && hasGenericTerms) {
score += 20; // Some credit for examples
issues.push('appears to use generic examples rather than actual code');
}
const feedback = issues.length > 0
? `Code snippet issues: ${issues.join(', ')}. Found ${codeSnippetCount} code references.`
: `Good code specificity: Found ${codeSnippetCount} actual code snippets`;
return {
passed: score >= 50,
score,
feedback,
severity: score < 30 ? 'critical' : 'important',
category: 'specificity'
};
}
evaluateConcreteAnalysisDepth(response, context) {
// Only apply to technical chatmodes
const technicalSubagents = ['Security Engineer', 'Software Engineer', 'DevOps Engineer', 'Database Architect', 'Test Engineer'];
if (!technicalSubagents.includes(context.subagent)) {
return { passed: true, score: 100, feedback: 'Not applicable for this chatmode', severity: 'minor', category: 'specificity' };
}
const analysis = response.deliverables.analysis || '';
const recommendations = (response.deliverables.recommendations || []).join(' ');
const fullText = `${analysis} ${recommendations}`;
let score = 0;
const issues = [];
// Check against generic/vague terms that indicate shallow analysis
const vagueTerms = [
'security vulnerabilities', 'performance issues', 'best practices',
'code quality', 'optimization opportunities', 'potential problems',
'may have', 'could be', 'might contain', 'generally', 'typically',
'standard practices', 'common issues', 'usual problems'
];
const vagueness = vagueTerms.filter(term => fullText.toLowerCase().includes(term.toLowerCase())).length;
if (vagueness <= 2) {
score += 40;
}
else if (vagueness <= 5) {
score += 20;
}
else {
issues.push(`too many vague terms used (${vagueness} found)`);
}
// Check for specific technical terms that indicate deep analysis
const specificTerms = [
'algorithm', 'function', 'method', 'variable', 'parameter',
'injection', 'XSS', 'CSRF', 'JWT', 'SQL', 'NoSQL',
'middleware', 'controller', 'service', 'repository',
'index', 'query', 'schema', 'migration', 'constraint',
'dependency', 'import', 'export', 'configuration'
];
const specificity = specificTerms.filter(term => fullText.toLowerCase().includes(term.toLowerCase())).length;
if (specificity >= 5) {
score += 40;
}
else if (specificity >= 3) {
score += 25;
}
else {
issues.push(`insufficient technical specificity (${specificity} technical terms)`);
}
// Check for actionable recommendations vs generic advice
const actionableIndicators = [
'change line', 'modify function', 'update configuration',
'add validation', 'remove code', 'refactor method',
'fix query', 'update schema', 'install package'
];
const actionableCount = actionableIndicators.filter(indicator => fullText.toLowerCase().includes(indicator.toLowerCase())).length;
if (actionableCount >= 2) {
score += 20;
}
else if (actionableCount >= 1) {
score += 10;
}
else {
issues.push('lacks actionable recommendations');
}
const feedback = issues.length > 0
? `Analysis depth issues: ${issues.join(', ')}. Specificity: ${specificity}/15 technical terms, Vagueness: ${vagueness} vague terms.`
: `Good analysis depth: ${specificity} technical terms, ${actionableCount} actionable recommendations`;
return {
passed: score >= 60,
score,
feedback,
severity: score < 40 ? 'critical' : 'important',
category: 'specificity'
};
}
}