erosolar-cli
Version:
Unified AI agent framework for the command line - Multi-provider support with schema-driven tools, code intelligence, and transparent reasoning
523 lines (509 loc) • 15.4 kB
JavaScript
/**
* AlphaZero-Style Dual Agent Engine
*
* Implements self-play concepts for code generation:
* 1. Dual Response Generation - Generate 2 responses, pick the best
* 2. Self-Critique Loop - Critique and improve responses
* 3. Solution Quality Scoring - Multi-dimensional evaluation
* 4. Tool Pattern Learning - Learn optimal tool sequences
*
* Principal Investigator: Bo Shang
*/
export const DEFAULT_DUAL_CONFIG = {
enabled: true,
temperatureA: 0.3, // Conservative
temperatureB: 0.7, // Creative
minQualityDifference: 0.1,
timeoutMs: 60000,
};
/**
* Generates evaluation prompt for comparing two responses
*/
export function buildEvaluationPrompt(userQuery, responseA, responseB) {
return `You are an expert code reviewer evaluating two AI assistant responses.
USER QUERY:
${userQuery}
RESPONSE A:
---
${responseA.slice(0, 4000)}
---
RESPONSE B:
---
${responseB.slice(0, 4000)}
---
Evaluate both responses on these dimensions (0-100 each):
1. Correctness - Is the solution correct and bug-free?
2. Completeness - Does it fully address the user's request?
3. Efficiency - Is the code/approach efficient?
4. Code Quality - Is the code clean, readable, well-structured?
5. Tool Usage - Are tools used appropriately and effectively?
Return JSON only:
{
"winner": "A" | "B" | "tie",
"scores": {
"correctness": { "a": 0-100, "b": 0-100 },
"completeness": { "a": 0-100, "b": 0-100 },
"efficiency": { "a": 0-100, "b": 0-100 },
"codeQuality": { "a": 0-100, "b": 0-100 },
"toolUsage": { "a": 0-100, "b": 0-100 }
},
"reasoning": "brief explanation",
"confidence": 0-100
}`;
}
/**
* Parse evaluation response from LLM
*/
export function parseEvaluationResponse(response) {
try {
const match = response.match(/\{[\s\S]*\}/);
if (!match)
return null;
const parsed = JSON.parse(match[0]);
const qualityA = (parsed.scores.correctness.a +
parsed.scores.completeness.a +
parsed.scores.efficiency.a +
parsed.scores.codeQuality.a +
parsed.scores.toolUsage.a) / 5;
const qualityB = (parsed.scores.correctness.b +
parsed.scores.completeness.b +
parsed.scores.efficiency.b +
parsed.scores.codeQuality.b +
parsed.scores.toolUsage.b) / 5;
return {
qualityScoreA: qualityA,
qualityScoreB: qualityB,
dimensions: {
correctness: parsed.scores.correctness,
completeness: parsed.scores.completeness,
efficiency: parsed.scores.efficiency,
codeQuality: parsed.scores.codeQuality,
toolUsage: parsed.scores.toolUsage,
},
reasoning: parsed.reasoning,
confidence: parsed.confidence,
};
}
catch {
return null;
}
}
// ============================================================================
// SELF-CRITIQUE ENGINE
// ============================================================================
/**
* Generates self-critique prompt
*/
export function buildCritiquePrompt(userQuery, response, toolCalls) {
const toolSummary = toolCalls.length > 0
? toolCalls.map(t => `- ${t.name}: ${t.success ? 'success' : 'failed'}`).join('\n')
: 'No tools used';
return `You are a critical code reviewer. Analyze this AI assistant response for issues.
USER QUERY:
${userQuery}
RESPONSE:
---
${response.slice(0, 6000)}
---
TOOLS USED:
${toolSummary}
Find issues in these categories:
- correctness: bugs, logic errors, wrong approaches
- completeness: missing features, partial solutions
- efficiency: performance issues, unnecessary operations
- style: code style, readability problems
- security: potential vulnerabilities
Return JSON only:
{
"issues": [
{
"category": "correctness|completeness|efficiency|style|security",
"severity": "critical|major|minor",
"description": "what's wrong",
"suggestion": "how to fix",
"location": "where in code (if applicable)"
}
],
"overallQuality": 0-100,
"needsImprovement": true|false,
"improvementPriority": ["issue indices in order of importance"]
}`;
}
/**
* Parse critique response
*/
export function parseCritiqueResponse(response) {
try {
const match = response.match(/\{[\s\S]*\}/);
if (!match)
return [];
const parsed = JSON.parse(match[0]);
return parsed.issues || [];
}
catch {
return [];
}
}
/**
* Build improvement prompt based on critique
*/
export function buildImprovementPrompt(userQuery, originalResponse, issues) {
const issueList = issues
.map((i, idx) => `${idx + 1}. [${i.severity.toUpperCase()}] ${i.category}: ${i.description}${i.suggestion ? ` → ${i.suggestion}` : ''}`)
.join('\n');
return `Improve your previous response by fixing these issues:
ORIGINAL QUERY:
${userQuery}
ISSUES FOUND:
${issueList}
ORIGINAL RESPONSE:
---
${originalResponse.slice(0, 4000)}
---
Generate an improved response that addresses ALL issues listed above.
Focus especially on critical and major issues.
Maintain what was good about the original response.`;
}
// ============================================================================
// TOOL PATTERN LEARNING
// ============================================================================
/**
* Tool pattern tracker for learning optimal sequences
*/
export class ToolPatternTracker {
patterns = new Map();
currentSequence = [];
currentTaskType = 'general';
sequenceStartTime = 0;
/**
* Start tracking a new task
*/
startTask(taskType) {
this.currentTaskType = taskType;
this.currentSequence = [];
this.sequenceStartTime = Date.now();
}
/**
* Record a tool use
*/
recordToolUse(toolName, success) {
this.currentSequence.push(toolName);
}
/**
* Complete the current task and record the pattern
*/
completeTask(success) {
if (this.currentSequence.length === 0)
return;
const duration = Date.now() - this.sequenceStartTime;
const patternKey = this.currentSequence.join('→');
let patterns = this.patterns.get(this.currentTaskType);
if (!patterns) {
patterns = [];
this.patterns.set(this.currentTaskType, patterns);
}
// Find existing pattern or create new
let existing = patterns.find(p => p.toolSequence.join('→') === patternKey);
if (existing) {
// Update statistics
existing.occurrences++;
existing.successRate = (existing.successRate * (existing.occurrences - 1) + (success ? 1 : 0)) / existing.occurrences;
existing.avgDuration = (existing.avgDuration * (existing.occurrences - 1) + duration) / existing.occurrences;
}
else {
// Create new pattern
patterns.push({
taskType: this.currentTaskType,
toolSequence: [...this.currentSequence],
successRate: success ? 1 : 0,
avgDuration: duration,
occurrences: 1,
});
}
// Sort by success rate
patterns.sort((a, b) => b.successRate - a.successRate);
// Keep top 10 patterns per task type
if (patterns.length > 10) {
patterns.length = 10;
}
}
/**
* Get recommended tool sequence for a task type
*/
getRecommendedSequence(taskType) {
const patterns = this.patterns.get(taskType);
if (!patterns || patterns.length === 0)
return null;
// Return the most successful pattern with enough occurrences
const reliable = patterns.find(p => p.occurrences >= 3 && p.successRate >= 0.7);
return reliable?.toolSequence ?? patterns[0]?.toolSequence ?? null;
}
/**
* Get all learned patterns
*/
getAllPatterns() {
return new Map(this.patterns);
}
/**
* Export patterns for persistence
*/
exportPatterns() {
const result = {};
for (const [key, value] of this.patterns) {
result[key] = value;
}
return result;
}
/**
* Import patterns from persistence
*/
importPatterns(data) {
this.patterns.clear();
for (const [key, value] of Object.entries(data)) {
this.patterns.set(key, value);
}
}
}
/**
* Quick heuristic-based quality scoring (no LLM needed)
*/
export function quickQualityScore(response, toolCalls) {
let correctness = 50;
let completeness = 50;
let efficiency = 50;
let maintainability = 50;
let security = 50;
// Tool call success rate affects correctness
if (toolCalls.length > 0) {
const successRate = toolCalls.filter(t => t.success).length / toolCalls.length;
correctness = Math.round(50 + successRate * 40);
}
// Response length indicates completeness
if (response.length > 1000)
completeness += 15;
if (response.length > 3000)
completeness += 10;
if (response.length < 200)
completeness -= 20;
// Code blocks indicate actual implementation
const codeBlocks = (response.match(/```/g) || []).length / 2;
if (codeBlocks >= 1)
completeness += 10;
if (codeBlocks >= 3)
completeness += 5;
// Check for common patterns
if (/error|exception|try.*catch/i.test(response)) {
maintainability += 10; // Error handling
}
if (/\bconst\b|\blet\b/.test(response)) {
maintainability += 5; // Modern JS
}
if (/async|await|Promise/.test(response)) {
efficiency += 5; // Async patterns
}
// Security indicators
if (/validate|sanitize|escape/i.test(response))
security += 10;
if (/sql\s*injection|xss|csrf/i.test(response.toLowerCase()))
security -= 10; // Mentions vulnerabilities without fixing
// Bound scores
const bound = (n) => Math.max(0, Math.min(100, n));
correctness = bound(correctness);
completeness = bound(completeness);
efficiency = bound(efficiency);
maintainability = bound(maintainability);
security = bound(security);
const overall = Math.round(correctness * 0.3 +
completeness * 0.25 +
efficiency * 0.2 +
maintainability * 0.15 +
security * 0.1);
const breakdown = [
`Correctness: ${correctness}`,
`Completeness: ${completeness}`,
`Efficiency: ${efficiency}`,
`Maintainability: ${maintainability}`,
`Security: ${security}`,
].join(' | ');
return {
overall,
correctness,
completeness,
efficiency,
maintainability,
security,
breakdown,
};
}
// ============================================================================
// TASK TYPE CLASSIFICATION
// ============================================================================
/**
* Classify task type from user query for pattern matching
*/
export function classifyTaskType(query) {
const q = query.toLowerCase();
if (/\b(bug|fix|error|issue|broken|doesn't work|not working)\b/.test(q)) {
return 'bug-fix';
}
if (/\b(add|create|implement|build|make|new)\b/.test(q)) {
return 'feature-add';
}
if (/\b(refactor|clean|improve|optimize|simplify)\b/.test(q)) {
return 'refactor';
}
if (/\b(test|spec|coverage)\b/.test(q)) {
return 'testing';
}
if (/\b(explain|what|how|why|understand)\b/.test(q)) {
return 'explanation';
}
if (/\b(review|check|analyze|audit)\b/.test(q)) {
return 'review';
}
if (/\b(deploy|release|publish|ship)\b/.test(q)) {
return 'deployment';
}
if (/\b(config|setup|install|configure)\b/.test(q)) {
return 'configuration';
}
return 'general';
}
export const DEFAULT_ALPHA_CONFIG = {
dualResponseEnabled: true,
selfCritiqueEnabled: true,
patternLearningEnabled: true,
minQualityThreshold: 60,
maxCritiqueIterations: 2,
};
/**
* Main AlphaZero Engine coordinating all components
*/
export class AlphaZeroEngine {
config;
patternTracker;
sessionStats;
constructor(config = {}) {
this.config = { ...DEFAULT_ALPHA_CONFIG, ...config };
this.patternTracker = new ToolPatternTracker();
this.sessionStats = {
dualResponsesGenerated: 0,
critiqueIterations: 0,
improvementsApplied: 0,
patternsLearned: 0,
};
}
/**
* Start tracking a task
*/
startTask(userQuery) {
const taskType = classifyTaskType(userQuery);
this.patternTracker.startTask(taskType);
}
/**
* Record a tool call
*/
recordToolCall(toolName, success) {
this.patternTracker.recordToolUse(toolName, success);
}
/**
* Complete current task
*/
completeTask(success) {
this.patternTracker.completeTask(success);
if (success) {
this.sessionStats.patternsLearned++;
}
}
/**
* Get recommended tools for current task type
*/
getRecommendedTools(taskType) {
return this.patternTracker.getRecommendedSequence(taskType);
}
/**
* Score a response
*/
scoreResponse(response, toolCalls) {
return quickQualityScore(response, toolCalls);
}
/**
* Check if response needs improvement
*/
needsImprovement(score) {
return score.overall < this.config.minQualityThreshold;
}
/**
* Get session statistics
*/
getStats() {
return { ...this.sessionStats };
}
/**
* Export learned patterns
*/
exportLearning() {
return {
patterns: this.patternTracker.exportPatterns(),
stats: { ...this.sessionStats },
};
}
/**
* Import learned patterns
*/
importLearning(data) {
if (data.patterns) {
this.patternTracker.importPatterns(data.patterns);
}
}
/**
* Increment dual response counter
*/
recordDualResponse() {
this.sessionStats.dualResponsesGenerated++;
}
/**
* Increment critique counter
*/
recordCritique() {
this.sessionStats.critiqueIterations++;
}
/**
* Increment improvement counter
*/
recordImprovement() {
this.sessionStats.improvementsApplied++;
}
/**
* Get configuration
*/
getConfig() {
return { ...this.config };
}
/**
* Update configuration
*/
updateConfig(updates) {
this.config = { ...this.config, ...updates };
}
}
// ============================================================================
// SINGLETON INSTANCE
// ============================================================================
let engineInstance = null;
/**
* Get the global AlphaZero engine instance
*/
export function getAlphaZeroEngine() {
if (!engineInstance) {
engineInstance = new AlphaZeroEngine();
}
return engineInstance;
}
/**
* Reset the engine (for testing)
*/
export function resetAlphaZeroEngine() {
engineInstance = null;
}
//# sourceMappingURL=alphaZeroEngine.js.map