UNPKG

@stackmemoryai/stackmemory

Version:

Project-scoped memory for AI coding tools. Durable context across sessions with MCP integration, frames, smart retrieval, Claude Code skills, and automatic hooks.

612 lines (514 loc) 16.9 kB
#!/usr/bin/env node /** * GEPA Optimizer * * Genetic Eval-driven Prompt Algorithm for optimizing CLAUDE.md * * Usage: * node optimize.js init # Initialize with current CLAUDE.md * node optimize.js mutate # Generate new variants * node optimize.js eval [variant] # Run evals on variant(s) * node optimize.js score # Score all variants in generation * node optimize.js select # Select best, advance generation * node optimize.js run [generations] # Full optimization loop * node optimize.js status # Show current status * node optimize.js diff [a] [b] # Compare two variants */ import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; import { spawn, execSync } from 'child_process'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); // Configuration const CONFIG_PATH = path.join(__dirname, 'config.json'); const config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8')); const GEPA_DIR = process.env.GEPA_DIR || __dirname; const GENERATIONS_DIR = path.join(GEPA_DIR, 'generations'); const RESULTS_DIR = path.join(GEPA_DIR, 'results'); const EVALS_DIR = path.join(GEPA_DIR, 'evals'); // Ensure directories [GENERATIONS_DIR, RESULTS_DIR, EVALS_DIR].forEach((dir) => { if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); }); /** * State management */ function getState() { const statePath = path.join(GEPA_DIR, 'state.json'); if (fs.existsSync(statePath)) { return JSON.parse(fs.readFileSync(statePath, 'utf8')); } return { currentGeneration: 0, bestVariant: null, bestScore: 0, history: [], }; } function saveState(state) { fs.writeFileSync( path.join(GEPA_DIR, 'state.json'), JSON.stringify(state, null, 2) ); } /** * Get path for a generation/variant */ function getGenPath(gen, variant = null) { const genDir = path.join( GENERATIONS_DIR, `gen-${String(gen).padStart(3, '0')}` ); if (!fs.existsSync(genDir)) fs.mkdirSync(genDir, { recursive: true }); return variant ? path.join(genDir, `${variant}.md`) : genDir; } /** * Initialize GEPA with current CLAUDE.md */ async function init(targetPath) { const claudeMdPath = targetPath || path.join(process.cwd(), 'CLAUDE.md'); if (!fs.existsSync(claudeMdPath)) { console.error(`Error: ${claudeMdPath} not found`); process.exit(1); } const content = fs.readFileSync(claudeMdPath, 'utf8'); const genPath = getGenPath(0, 'baseline'); fs.writeFileSync(genPath, content); const state = { currentGeneration: 0, bestVariant: 'baseline', bestScore: 0, targetPath: claudeMdPath, history: [ { generation: 0, variant: 'baseline', action: 'init', timestamp: new Date().toISOString(), }, ], }; saveState(state); console.log(`Initialized GEPA with ${claudeMdPath}`); console.log(`Baseline saved to ${genPath}`); } /** * Generate mutations of the current best variant */ async function mutate() { const state = getState(); const nextGen = state.currentGeneration + 1; const currentBest = fs.readFileSync( getGenPath(state.currentGeneration, state.bestVariant), 'utf8' ); console.log( `Generating ${config.evolution.populationSize} variants for generation ${nextGen}...` ); const mutations = config.evolution.mutationStrategies; const variants = []; for (let i = 0; i < config.evolution.populationSize; i++) { const strategy = mutations[i % mutations.length]; const variantName = `variant-${String.fromCharCode(97 + i)}`; // a, b, c, d... console.log(` Creating ${variantName} using strategy: ${strategy}`); const mutatedContent = await generateMutation(currentBest, strategy, state); const variantPath = getGenPath(nextGen, variantName); fs.writeFileSync(variantPath, mutatedContent); variants.push({ name: variantName, strategy, path: variantPath }); } // Also copy baseline for comparison fs.writeFileSync(getGenPath(nextGen, 'baseline'), currentBest); state.history.push({ generation: nextGen, action: 'mutate', variants: variants.map((v) => v.name), timestamp: new Date().toISOString(), }); saveState(state); console.log( `\nGenerated ${variants.length} variants in gen-${String(nextGen).padStart(3, '0')}/` ); return variants; } /** * Generate a mutation using AI */ async function generateMutation(content, strategy, state) { const strategyPrompts = { rephrase: `Rephrase instructions for clarity without changing meaning. Make them more direct and actionable.`, add_examples: `Add concrete examples where instructions are abstract. Use <example> tags for code examples.`, remove_redundancy: `Remove redundant or repetitive instructions. Consolidate similar rules. Keep it DRY.`, restructure: `Reorganize sections for better flow. Group related instructions. Improve hierarchy.`, add_constraints: `Add specific constraints and guardrails based on common failure modes. Be precise about what NOT to do.`, simplify: `Simplify complex instructions. Break down multi-step rules. Use bullet points over paragraphs.`, }; const prompt = `You are optimizing a CLAUDE.md system prompt for an AI coding agent. CURRENT PROMPT: \`\`\`markdown ${content} \`\`\` OPTIMIZATION STRATEGY: ${strategy} ${strategyPrompts[strategy]} EVALUATION FEEDBACK FROM PREVIOUS GENERATIONS: ${getRecentFeedback(state)} REQUIREMENTS: 1. Output ONLY the improved markdown content 2. Preserve all critical instructions and constraints 3. Keep the same overall structure unless restructuring 4. Do not add commentary or explanations 5. Target <8000 tokens total length OUTPUT THE IMPROVED CLAUDE.MD:`; // Use Claude to generate mutation const result = await callClaude(prompt); return result.trim(); } /** * Get recent evaluation feedback for context */ function getRecentFeedback(state) { const scoresPath = path.join(RESULTS_DIR, 'scores.jsonl'); if (!fs.existsSync(scoresPath)) return 'No previous evaluations.'; const lines = fs .readFileSync(scoresPath, 'utf8') .trim() .split('\n') .slice(-20); const scores = lines.map((l) => JSON.parse(l)); const summary = scores.reduce((acc, s) => { if (!acc[s.variant]) acc[s.variant] = { total: 0, count: 0, errors: 0 }; acc[s.variant].total += s.metrics?.successfulToolCalls || 0; acc[s.variant].count++; acc[s.variant].errors += s.metrics?.errorCount || 0; return acc; }, {}); return Object.entries(summary) .map( ([v, s]) => `${v}: ${s.count} sessions, ${s.errors} errors, avg success: ${(s.total / s.count).toFixed(1)}` ) .join('\n'); } /** * Call Claude API for mutation generation */ async function callClaude(prompt) { // Try using claude CLI first try { const result = execSync(`echo ${JSON.stringify(prompt)} | claude --print`, { encoding: 'utf8', maxBuffer: 10 * 1024 * 1024, }); return result; } catch (e) { // Fallback to API const apiKey = process.env.ANTHROPIC_API_KEY; if (!apiKey) { console.error('Error: ANTHROPIC_API_KEY not set and claude CLI failed'); process.exit(1); } const response = await fetch('https://api.anthropic.com/v1/messages', { method: 'POST', headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01', }, body: JSON.stringify({ model: 'claude-sonnet-4-20250514', max_tokens: 8000, messages: [{ role: 'user', content: prompt }], }), }); const data = await response.json(); return data.content[0].text; } } /** * Run evaluations on a variant */ async function runEval(variantName) { const state = getState(); const gen = state.currentGeneration + 1; // Eval next gen variants const variantPath = getGenPath(gen, variantName); if (!fs.existsSync(variantPath)) { console.error(`Variant not found: ${variantPath}`); return null; } console.log(`Running evals on ${variantName}...`); // Load eval tasks const evalFiles = fs .readdirSync(EVALS_DIR) .filter((f) => f.endsWith('.jsonl')); const tasks = evalFiles.flatMap((f) => fs .readFileSync(path.join(EVALS_DIR, f), 'utf8') .trim() .split('\n') .map((l) => JSON.parse(l)) ); console.log(` Found ${tasks.length} eval tasks`); // Set environment for tracking process.env.GEPA_VARIANT = variantName; process.env.GEPA_GENERATION = String(gen); const results = []; for (const task of tasks.slice(0, config.evals.minSamplesPerVariant)) { console.log(` Running: ${task.name}`); const result = await runSingleEval(task, variantPath); results.push({ taskId: task.id, taskName: task.name, ...result, }); } // Save results const resultsPath = path.join(RESULTS_DIR, `eval-${gen}-${variantName}.json`); fs.writeFileSync( resultsPath, JSON.stringify({ variant: variantName, generation: gen, results }, null, 2) ); // Calculate aggregate score const score = calculateScore(results); console.log(` Score: ${(score * 100).toFixed(1)}%`); return { variant: variantName, score, results }; } /** * Run a single eval task */ async function runSingleEval(task, variantPath) { const startTime = Date.now(); try { // Create temp project with variant as CLAUDE.md const tempDir = fs.mkdtempSync('/tmp/gepa-eval-'); fs.copyFileSync(variantPath, path.join(tempDir, 'CLAUDE.md')); // Copy fixture if needed if (task.input_file) { const fixturePath = path.join(EVALS_DIR, task.input_file); if (fs.existsSync(fixturePath)) { fs.copyFileSync( fixturePath, path.join(tempDir, path.basename(task.input_file)) ); } } // Run claude with the task prompt const result = execSync( `cd ${tempDir} && echo ${JSON.stringify(task.prompt)} | timeout ${config.evals.timeout / 1000} claude --print 2>&1 || true`, { encoding: 'utf8', maxBuffer: 10 * 1024 * 1024 } ); // Evaluate result against expected outcomes const passed = evaluateExpectations(result, task.expected); // Cleanup fs.rmSync(tempDir, { recursive: true, force: true }); return { passed, duration: Date.now() - startTime, output: result.slice(0, 2000), }; } catch (error) { return { passed: false, duration: Date.now() - startTime, error: error.message, }; } } /** * Evaluate output against expectations */ function evaluateExpectations(output, expected) { if (!expected) return true; const checks = Object.entries(expected).map(([key, value]) => { // Simple heuristic checks switch (key) { case 'has_function': return /function\s+\w+|const\s+\w+\s*=\s*(\([^)]*\)|async)?\s*(=>|\{)/.test( output ); case 'handles_edge_cases': return /if\s*\(|edge|empty|null|undefined|\.length/.test(output); case 'uses_async': return /async|await|Promise/.test(output); case 'no_nested_callbacks': return !/callback\s*\(\s*function|\.then\s*\([^)]*\.then/.test(output); case 'bug_fixed': return /fix|correct|change|update/i.test(output); case 'explains_fix': return ( output.length > 200 && /because|since|the issue|the problem/i.test(output) ); default: return output.toLowerCase().includes(key.toLowerCase()); } }); return checks.filter(Boolean).length / checks.length >= 0.6; } /** * Calculate weighted score */ function calculateScore(results) { const passed = results.filter((r) => r.passed).length; return passed / results.length; } /** * Score all variants and select best */ async function scoreAndSelect() { const state = getState(); const gen = state.currentGeneration + 1; const genDir = getGenPath(gen); if (!fs.existsSync(genDir)) { console.error(`Generation ${gen} not found. Run 'mutate' first.`); return; } const variants = fs .readdirSync(genDir) .filter((f) => f.endsWith('.md')) .map((f) => f.replace('.md', '')); console.log(`Scoring ${variants.length} variants in generation ${gen}...`); const scores = []; for (const variant of variants) { const result = await runEval(variant); if (result) scores.push(result); } // Sort by score scores.sort((a, b) => b.score - a.score); console.log('\nResults:'); scores.forEach((s, i) => { const marker = i === 0 ? ' <-- BEST' : ''; console.log( ` ${i + 1}. ${s.variant}: ${(s.score * 100).toFixed(1)}%${marker}` ); }); // Select best const best = scores[0]; if (best.score > state.bestScore) { state.currentGeneration = gen; state.bestVariant = best.variant; state.bestScore = best.score; // Update symlink const currentLink = path.join(GENERATIONS_DIR, 'current'); if (fs.existsSync(currentLink)) fs.unlinkSync(currentLink); fs.symlinkSync(getGenPath(gen, best.variant), currentLink); console.log( `\nNew best: ${best.variant} (${(best.score * 100).toFixed(1)}%)` ); console.log( `Symlink updated: generations/current -> gen-${String(gen).padStart(3, '0')}/${best.variant}.md` ); } else { console.log( `\nNo improvement over previous best (${(state.bestScore * 100).toFixed(1)}%)` ); } state.history.push({ generation: gen, action: 'select', scores: scores.map((s) => ({ variant: s.variant, score: s.score })), best: best.variant, timestamp: new Date().toISOString(), }); saveState(state); return best; } /** * Full optimization loop */ async function run(generations = config.evolution.generations) { console.log(`Starting GEPA optimization for ${generations} generations...\n`); for (let i = 0; i < generations; i++) { console.log(`\n${'='.repeat(60)}`); console.log(`GENERATION ${i + 1}/${generations}`); console.log(`${'='.repeat(60)}\n`); await mutate(); const best = await scoreAndSelect(); if (best.score >= config.scoring.threshold) { console.log( `\nThreshold reached (${(config.scoring.threshold * 100).toFixed(0)}%)! Stopping early.` ); break; } } console.log('\n' + '='.repeat(60)); console.log('OPTIMIZATION COMPLETE'); console.log('='.repeat(60)); const state = getState(); console.log(`Best variant: ${state.bestVariant}`); console.log(`Best score: ${(state.bestScore * 100).toFixed(1)}%`); console.log(`Generations: ${state.currentGeneration}`); console.log(`\nTo apply: cp generations/current /path/to/your/CLAUDE.md`); } /** * Show status */ function status() { const state = getState(); console.log('GEPA Status'); console.log('==========='); console.log(`Current generation: ${state.currentGeneration}`); console.log(`Best variant: ${state.bestVariant}`); console.log(`Best score: ${(state.bestScore * 100).toFixed(1)}%`); console.log(`Target: ${state.targetPath}`); console.log(`\nHistory:`); state.history.slice(-10).forEach((h) => { console.log(` [${h.timestamp}] ${h.action} (gen ${h.generation})`); }); } /** * Diff two variants */ function diff(a, b) { const state = getState(); const gen = state.currentGeneration; const pathA = getGenPath(gen, a || 'baseline'); const pathB = getGenPath(gen, b || state.bestVariant); if (!fs.existsSync(pathA) || !fs.existsSync(pathB)) { console.error('Variant not found'); return; } try { execSync(`diff -u ${pathA} ${pathB}`, { stdio: 'inherit' }); } catch (e) { // diff returns non-zero when files differ } } // CLI const command = process.argv[2]; const arg1 = process.argv[3]; const arg2 = process.argv[4]; switch (command) { case 'init': init(arg1); break; case 'mutate': mutate(); break; case 'eval': runEval(arg1 || 'baseline'); break; case 'select': case 'score': scoreAndSelect(); break; case 'run': run(parseInt(arg1) || config.evolution.generations); break; case 'status': status(); break; case 'diff': diff(arg1, arg2); break; default: console.log(` GEPA - Genetic Eval-driven Prompt Algorithm Usage: node optimize.js init [path] Initialize with CLAUDE.md node optimize.js mutate Generate new variants node optimize.js eval [variant] Run evals on variant node optimize.js score Score all variants, select best node optimize.js run [generations] Full optimization loop node optimize.js status Show current status node optimize.js diff [a] [b] Compare two variants `); }