UNPKG

erosolar-cli

Version:

Unified AI agent framework for the command line - Multi-provider support with schema-driven tools, code intelligence, and transparent reasoning

523 lines (509 loc) 15.4 kB
/** * AlphaZero-Style Dual Agent Engine * * Implements self-play concepts for code generation: * 1. Dual Response Generation - Generate 2 responses, pick the best * 2. Self-Critique Loop - Critique and improve responses * 3. Solution Quality Scoring - Multi-dimensional evaluation * 4. Tool Pattern Learning - Learn optimal tool sequences * * Principal Investigator: Bo Shang */ export const DEFAULT_DUAL_CONFIG = { enabled: true, temperatureA: 0.3, // Conservative temperatureB: 0.7, // Creative minQualityDifference: 0.1, timeoutMs: 60000, }; /** * Generates evaluation prompt for comparing two responses */ export function buildEvaluationPrompt(userQuery, responseA, responseB) { return `You are an expert code reviewer evaluating two AI assistant responses. USER QUERY: ${userQuery} RESPONSE A: --- ${responseA.slice(0, 4000)} --- RESPONSE B: --- ${responseB.slice(0, 4000)} --- Evaluate both responses on these dimensions (0-100 each): 1. Correctness - Is the solution correct and bug-free? 2. Completeness - Does it fully address the user's request? 3. Efficiency - Is the code/approach efficient? 4. Code Quality - Is the code clean, readable, well-structured? 5. Tool Usage - Are tools used appropriately and effectively? Return JSON only: { "winner": "A" | "B" | "tie", "scores": { "correctness": { "a": 0-100, "b": 0-100 }, "completeness": { "a": 0-100, "b": 0-100 }, "efficiency": { "a": 0-100, "b": 0-100 }, "codeQuality": { "a": 0-100, "b": 0-100 }, "toolUsage": { "a": 0-100, "b": 0-100 } }, "reasoning": "brief explanation", "confidence": 0-100 }`; } /** * Parse evaluation response from LLM */ export function parseEvaluationResponse(response) { try { const match = response.match(/\{[\s\S]*\}/); if (!match) return null; const parsed = JSON.parse(match[0]); const qualityA = (parsed.scores.correctness.a + parsed.scores.completeness.a + parsed.scores.efficiency.a + parsed.scores.codeQuality.a + parsed.scores.toolUsage.a) / 5; const qualityB = (parsed.scores.correctness.b + parsed.scores.completeness.b + parsed.scores.efficiency.b + parsed.scores.codeQuality.b + parsed.scores.toolUsage.b) / 5; return { qualityScoreA: qualityA, qualityScoreB: qualityB, dimensions: { correctness: parsed.scores.correctness, completeness: parsed.scores.completeness, efficiency: parsed.scores.efficiency, codeQuality: parsed.scores.codeQuality, toolUsage: parsed.scores.toolUsage, }, reasoning: parsed.reasoning, confidence: parsed.confidence, }; } catch { return null; } } // ============================================================================ // SELF-CRITIQUE ENGINE // ============================================================================ /** * Generates self-critique prompt */ export function buildCritiquePrompt(userQuery, response, toolCalls) { const toolSummary = toolCalls.length > 0 ? toolCalls.map(t => `- ${t.name}: ${t.success ? 'success' : 'failed'}`).join('\n') : 'No tools used'; return `You are a critical code reviewer. Analyze this AI assistant response for issues. USER QUERY: ${userQuery} RESPONSE: --- ${response.slice(0, 6000)} --- TOOLS USED: ${toolSummary} Find issues in these categories: - correctness: bugs, logic errors, wrong approaches - completeness: missing features, partial solutions - efficiency: performance issues, unnecessary operations - style: code style, readability problems - security: potential vulnerabilities Return JSON only: { "issues": [ { "category": "correctness|completeness|efficiency|style|security", "severity": "critical|major|minor", "description": "what's wrong", "suggestion": "how to fix", "location": "where in code (if applicable)" } ], "overallQuality": 0-100, "needsImprovement": true|false, "improvementPriority": ["issue indices in order of importance"] }`; } /** * Parse critique response */ export function parseCritiqueResponse(response) { try { const match = response.match(/\{[\s\S]*\}/); if (!match) return []; const parsed = JSON.parse(match[0]); return parsed.issues || []; } catch { return []; } } /** * Build improvement prompt based on critique */ export function buildImprovementPrompt(userQuery, originalResponse, issues) { const issueList = issues .map((i, idx) => `${idx + 1}. [${i.severity.toUpperCase()}] ${i.category}: ${i.description}${i.suggestion ? ` → ${i.suggestion}` : ''}`) .join('\n'); return `Improve your previous response by fixing these issues: ORIGINAL QUERY: ${userQuery} ISSUES FOUND: ${issueList} ORIGINAL RESPONSE: --- ${originalResponse.slice(0, 4000)} --- Generate an improved response that addresses ALL issues listed above. Focus especially on critical and major issues. Maintain what was good about the original response.`; } // ============================================================================ // TOOL PATTERN LEARNING // ============================================================================ /** * Tool pattern tracker for learning optimal sequences */ export class ToolPatternTracker { patterns = new Map(); currentSequence = []; currentTaskType = 'general'; sequenceStartTime = 0; /** * Start tracking a new task */ startTask(taskType) { this.currentTaskType = taskType; this.currentSequence = []; this.sequenceStartTime = Date.now(); } /** * Record a tool use */ recordToolUse(toolName, success) { this.currentSequence.push(toolName); } /** * Complete the current task and record the pattern */ completeTask(success) { if (this.currentSequence.length === 0) return; const duration = Date.now() - this.sequenceStartTime; const patternKey = this.currentSequence.join('→'); let patterns = this.patterns.get(this.currentTaskType); if (!patterns) { patterns = []; this.patterns.set(this.currentTaskType, patterns); } // Find existing pattern or create new let existing = patterns.find(p => p.toolSequence.join('→') === patternKey); if (existing) { // Update statistics existing.occurrences++; existing.successRate = (existing.successRate * (existing.occurrences - 1) + (success ? 1 : 0)) / existing.occurrences; existing.avgDuration = (existing.avgDuration * (existing.occurrences - 1) + duration) / existing.occurrences; } else { // Create new pattern patterns.push({ taskType: this.currentTaskType, toolSequence: [...this.currentSequence], successRate: success ? 1 : 0, avgDuration: duration, occurrences: 1, }); } // Sort by success rate patterns.sort((a, b) => b.successRate - a.successRate); // Keep top 10 patterns per task type if (patterns.length > 10) { patterns.length = 10; } } /** * Get recommended tool sequence for a task type */ getRecommendedSequence(taskType) { const patterns = this.patterns.get(taskType); if (!patterns || patterns.length === 0) return null; // Return the most successful pattern with enough occurrences const reliable = patterns.find(p => p.occurrences >= 3 && p.successRate >= 0.7); return reliable?.toolSequence ?? patterns[0]?.toolSequence ?? null; } /** * Get all learned patterns */ getAllPatterns() { return new Map(this.patterns); } /** * Export patterns for persistence */ exportPatterns() { const result = {}; for (const [key, value] of this.patterns) { result[key] = value; } return result; } /** * Import patterns from persistence */ importPatterns(data) { this.patterns.clear(); for (const [key, value] of Object.entries(data)) { this.patterns.set(key, value); } } } /** * Quick heuristic-based quality scoring (no LLM needed) */ export function quickQualityScore(response, toolCalls) { let correctness = 50; let completeness = 50; let efficiency = 50; let maintainability = 50; let security = 50; // Tool call success rate affects correctness if (toolCalls.length > 0) { const successRate = toolCalls.filter(t => t.success).length / toolCalls.length; correctness = Math.round(50 + successRate * 40); } // Response length indicates completeness if (response.length > 1000) completeness += 15; if (response.length > 3000) completeness += 10; if (response.length < 200) completeness -= 20; // Code blocks indicate actual implementation const codeBlocks = (response.match(/```/g) || []).length / 2; if (codeBlocks >= 1) completeness += 10; if (codeBlocks >= 3) completeness += 5; // Check for common patterns if (/error|exception|try.*catch/i.test(response)) { maintainability += 10; // Error handling } if (/\bconst\b|\blet\b/.test(response)) { maintainability += 5; // Modern JS } if (/async|await|Promise/.test(response)) { efficiency += 5; // Async patterns } // Security indicators if (/validate|sanitize|escape/i.test(response)) security += 10; if (/sql\s*injection|xss|csrf/i.test(response.toLowerCase())) security -= 10; // Mentions vulnerabilities without fixing // Bound scores const bound = (n) => Math.max(0, Math.min(100, n)); correctness = bound(correctness); completeness = bound(completeness); efficiency = bound(efficiency); maintainability = bound(maintainability); security = bound(security); const overall = Math.round(correctness * 0.3 + completeness * 0.25 + efficiency * 0.2 + maintainability * 0.15 + security * 0.1); const breakdown = [ `Correctness: ${correctness}`, `Completeness: ${completeness}`, `Efficiency: ${efficiency}`, `Maintainability: ${maintainability}`, `Security: ${security}`, ].join(' | '); return { overall, correctness, completeness, efficiency, maintainability, security, breakdown, }; } // ============================================================================ // TASK TYPE CLASSIFICATION // ============================================================================ /** * Classify task type from user query for pattern matching */ export function classifyTaskType(query) { const q = query.toLowerCase(); if (/\b(bug|fix|error|issue|broken|doesn't work|not working)\b/.test(q)) { return 'bug-fix'; } if (/\b(add|create|implement|build|make|new)\b/.test(q)) { return 'feature-add'; } if (/\b(refactor|clean|improve|optimize|simplify)\b/.test(q)) { return 'refactor'; } if (/\b(test|spec|coverage)\b/.test(q)) { return 'testing'; } if (/\b(explain|what|how|why|understand)\b/.test(q)) { return 'explanation'; } if (/\b(review|check|analyze|audit)\b/.test(q)) { return 'review'; } if (/\b(deploy|release|publish|ship)\b/.test(q)) { return 'deployment'; } if (/\b(config|setup|install|configure)\b/.test(q)) { return 'configuration'; } return 'general'; } export const DEFAULT_ALPHA_CONFIG = { dualResponseEnabled: true, selfCritiqueEnabled: true, patternLearningEnabled: true, minQualityThreshold: 60, maxCritiqueIterations: 2, }; /** * Main AlphaZero Engine coordinating all components */ export class AlphaZeroEngine { config; patternTracker; sessionStats; constructor(config = {}) { this.config = { ...DEFAULT_ALPHA_CONFIG, ...config }; this.patternTracker = new ToolPatternTracker(); this.sessionStats = { dualResponsesGenerated: 0, critiqueIterations: 0, improvementsApplied: 0, patternsLearned: 0, }; } /** * Start tracking a task */ startTask(userQuery) { const taskType = classifyTaskType(userQuery); this.patternTracker.startTask(taskType); } /** * Record a tool call */ recordToolCall(toolName, success) { this.patternTracker.recordToolUse(toolName, success); } /** * Complete current task */ completeTask(success) { this.patternTracker.completeTask(success); if (success) { this.sessionStats.patternsLearned++; } } /** * Get recommended tools for current task type */ getRecommendedTools(taskType) { return this.patternTracker.getRecommendedSequence(taskType); } /** * Score a response */ scoreResponse(response, toolCalls) { return quickQualityScore(response, toolCalls); } /** * Check if response needs improvement */ needsImprovement(score) { return score.overall < this.config.minQualityThreshold; } /** * Get session statistics */ getStats() { return { ...this.sessionStats }; } /** * Export learned patterns */ exportLearning() { return { patterns: this.patternTracker.exportPatterns(), stats: { ...this.sessionStats }, }; } /** * Import learned patterns */ importLearning(data) { if (data.patterns) { this.patternTracker.importPatterns(data.patterns); } } /** * Increment dual response counter */ recordDualResponse() { this.sessionStats.dualResponsesGenerated++; } /** * Increment critique counter */ recordCritique() { this.sessionStats.critiqueIterations++; } /** * Increment improvement counter */ recordImprovement() { this.sessionStats.improvementsApplied++; } /** * Get configuration */ getConfig() { return { ...this.config }; } /** * Update configuration */ updateConfig(updates) { this.config = { ...this.config, ...updates }; } } // ============================================================================ // SINGLETON INSTANCE // ============================================================================ let engineInstance = null; /** * Get the global AlphaZero engine instance */ export function getAlphaZeroEngine() { if (!engineInstance) { engineInstance = new AlphaZeroEngine(); } return engineInstance; } /** * Reset the engine (for testing) */ export function resetAlphaZeroEngine() { engineInstance = null; } //# sourceMappingURL=alphaZeroEngine.js.map