@ace-sdk/cli
Version:
ACE CLI - Command-line tool for intelligent pattern learning and playbook management
874 lines • 38.3 kB
JavaScript
/**
* InitializationService - Offline learning from existing codebase
*
* Implements ACE Paper Section 4.1: Offline Adaptation
* Analyzes git history and existing code to build initial playbook
*/
import { exec } from 'child_process';
import { promisify } from 'util';
import { LanguageDetector } from '@ace-sdk/core';
const execAsync = promisify(exec);
export class InitializationService {
languageDetector;
constructor() {
this.languageDetector = new LanguageDetector();
}
/**
* Initialize playbook from existing codebase
*
* SUPPORTS MULTIPLE MODES:
* - hybrid: Docs + Git + Local files (NEW DEFAULT - intelligent fallback)
* - both: Git + Local files (legacy behavior)
* - docs-only: Only documentation files
* - git-history: Only git commits
* - local-files: Only source files
*
* USES LINGUIST: Automatically detects ALL programming languages (no hardcoded extensions!)
*/
async initializeFromCodebase(repoPath, options = {}) {
const { mode = 'hybrid', commitLimit = 500, // NEW DEFAULT: 500 (was 100)
daysBack = 90, // NEW DEFAULT: 90 (was 30)
maxFiles = 5000, // NEW DEFAULT: 5000 (was implicit 1000)
// filePatterns used by legacy analyzeSourceFiles fallback (hardcoded in that method)
filePatterns: _filePatterns = ['*.ts', '*.js', '*.py', '*.java', '*.go', '*.tsx', '*.jsx'], // eslint-disable-line @typescript-eslint/no-unused-vars
skipPatterns = ['merge', 'wip', 'temp', 'revert'] } = options;
console.error('📚 Analyzing codebase for offline initialization...');
console.error(` Repo: ${repoPath}`);
console.error(` Mode: ${mode}`);
const allPatterns = [];
// Determine what to analyze based on mode
const shouldAnalyzeDocs = mode === 'hybrid' || mode === 'docs-only';
const shouldAnalyzeGit = mode === 'hybrid' || mode === 'both' || mode === 'git-history';
const shouldAnalyzeFiles = mode === 'hybrid' || mode === 'both' || mode === 'local-files';
// 1. Analyze documentation files (NEW - hybrid and docs-only modes)
if (shouldAnalyzeDocs) {
console.error(' Scanning documentation files...');
const docsPatterns = await this.analyzeDocumentation(repoPath);
console.error(` Extracted ${docsPatterns.length} patterns from documentation`);
allPatterns.push(...docsPatterns);
}
// 2. Analyze git history (if available and requested)
if (shouldAnalyzeGit) {
const hasGit = await this.hasGitRepo(repoPath);
if (hasGit) {
console.error(` Git repo detected - analyzing commits (${commitLimit} max, ${daysBack} days)`);
const commits = await this.analyzeGitHistory(repoPath, commitLimit, daysBack, skipPatterns);
console.error(` Found ${commits.length} relevant commits`);
const gitPatterns = await this.extractPatternsFromCommits(commits, repoPath);
console.error(` Extracted ${gitPatterns.length} patterns from git history`);
allPatterns.push(...gitPatterns);
}
else {
console.error(' No git repo - skipping commit analysis');
}
}
// 3. Analyze local source files (if requested)
if (shouldAnalyzeFiles) {
console.error(` Analyzing local source files (max: ${maxFiles === -1 ? 'unlimited' : maxFiles})...`);
console.error(` Using GitHub Linguist for automatic language detection...`);
const sourcePatterns = await this.analyzeSourceFilesWithLinguist(repoPath, maxFiles);
console.error(` Extracted ${sourcePatterns.length} patterns from source files`);
allPatterns.push(...sourcePatterns);
}
// 4. Build structured playbook from combined sources
console.error(` Total patterns discovered: ${allPatterns.length}`);
const playbook = this.buildInitialPlaybook(allPatterns);
console.error('✅ Offline initialization complete');
return playbook;
}
/**
* Check if directory has git repository
*/
async hasGitRepo(repoPath) {
try {
await execAsync(`git -C "${repoPath}" rev-parse --git-dir`, {
timeout: 5000
});
return true;
}
catch {
return false;
}
}
/**
* Analyze git history to extract meaningful commits
*/
async analyzeGitHistory(repoPath, limit, daysBack, skipPatterns) {
const sinceDate = new Date();
sinceDate.setDate(sinceDate.getDate() - daysBack);
const since = sinceDate.toISOString().split('T')[0];
try {
// Get commit log with stats
const { stdout } = await execAsync(`git -C "${repoPath}" log --since="${since}" --pretty=format:"%H|%s|%an|%ai" --numstat -n ${limit}`, { maxBuffer: 10 * 1024 * 1024 } // 10MB buffer
);
const lines = stdout.split('\n');
const commits = [];
let currentCommit = null;
for (const line of lines) {
if (!line.trim()) {
if (currentCommit && currentCommit.hash) {
commits.push(currentCommit);
}
currentCommit = null;
continue;
}
// Commit header: hash|message|author|date
if (line.includes('|')) {
const [hash, message, author, date] = line.split('|');
// Skip commits with skip patterns
if (skipPatterns.some(p => message.toLowerCase().includes(p))) {
currentCommit = null;
continue;
}
currentCommit = {
hash,
message,
author,
date,
files: [],
additions: 0,
deletions: 0
};
}
// File stat: additions deletions filename
else if (currentCommit) {
const [additions, deletions, filename] = line.split('\t');
if (filename) {
currentCommit.files.push(filename);
currentCommit.additions += parseInt(additions) || 0;
currentCommit.deletions += parseInt(deletions) || 0;
}
}
}
return commits;
}
catch (error) {
console.error('⚠️ Git analysis failed (not a git repo?), using empty history');
return [];
}
}
/**
* Analyze documentation files for best practices, troubleshooting, and architectural guidance
*
* Scans: CLAUDE.md, README.md, ARCHITECTURE.md, docs/ directory, and root markdown files
*/
async analyzeDocumentation(repoPath) {
const fs = await import('fs/promises');
const path = await import('path');
const patterns = [];
const docFiles = [
'CLAUDE.md',
'README.md',
'ARCHITECTURE.md',
'CONTRIBUTING.md',
'DEVELOPMENT.md',
'TROUBLESHOOTING.md'
];
// Scan root-level docs
for (const docFile of docFiles) {
const docPath = path.join(repoPath, docFile);
try {
const content = await fs.readFile(docPath, 'utf-8');
const extracted = await this.extractPatternsFromMarkdown(content, docFile);
patterns.push(...extracted);
}
catch {
// File doesn't exist, skip
}
}
// Scan docs/ directory
const docsDir = path.join(repoPath, 'docs');
try {
const docsDirFiles = await this.findMarkdownFiles(docsDir);
for (const file of docsDirFiles.slice(0, 20)) { // Max 20 docs files
try {
const content = await fs.readFile(file, 'utf-8');
const relativePath = path.relative(repoPath, file);
const extracted = await this.extractPatternsFromMarkdown(content, relativePath);
patterns.push(...extracted);
}
catch {
// Skip files that can't be read
}
}
}
catch {
// docs/ directory doesn't exist
}
return patterns;
}
/**
* Find markdown files in a directory
*/
async findMarkdownFiles(dir) {
const fs = await import('fs/promises');
const path = await import('path');
const files = [];
async function scan(currentDir, depth = 0) {
if (depth > 3)
return; // Max depth 3
try {
const entries = await fs.readdir(currentDir, { withFileTypes: true });
for (const entry of entries) {
if (entry.name.startsWith('.'))
continue;
const fullPath = path.join(currentDir, entry.name);
if (entry.isDirectory()) {
await scan(fullPath, depth + 1);
}
else if (entry.isFile() && entry.name.endsWith('.md')) {
files.push(fullPath);
}
}
}
catch {
// Skip directories we can't read
}
}
await scan(dir);
return files;
}
/**
* Extract patterns from markdown documentation
*/
async extractPatternsFromMarkdown(content, filename) {
const patterns = [];
// First, extract complete code blocks from markdown
const { extractCodeBlocksFromMarkdown } = await import('../utils/code-extractor.js');
const codeBlocks = extractCodeBlocksFromMarkdown(content);
for (const codeBlock of codeBlocks) {
patterns.push({
section: 'useful_code_snippets',
content: codeBlock, // Complete code block, not snippet
confidence: 0.9,
evidence: [filename]
});
}
// Then extract headers and their content for other patterns
const lines = content.split('\n');
let currentSection = '';
let currentContent = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Detect headers
if (line.startsWith('#')) {
// Process previous section
if (currentSection && currentContent.length > 0) {
const sectionText = currentContent.join('\n').trim(); // Join with newlines, not spaces
const pattern = this.classifyDocSection(currentSection, sectionText, filename);
if (pattern)
patterns.push(pattern);
}
currentSection = line.replace(/^#+\s*/, '').trim();
currentContent = [];
}
else if (line.trim()) {
currentContent.push(line); // Keep original line, not trimmed
}
}
// Process last section
if (currentSection && currentContent.length > 0) {
const sectionText = currentContent.join('\n').trim();
const pattern = this.classifyDocSection(currentSection, sectionText, filename);
if (pattern)
patterns.push(pattern);
}
return patterns;
}
/**
* Classify documentation section into playbook category
*/
classifyDocSection(header, content, filename) {
const headerLower = header.toLowerCase();
// Increase from 200 to 2000 chars to capture more context
const contentSnippet = content.substring(0, 2000);
// Skip very short sections
if (content.length < 50)
return null;
// STRATEGIES: Best practices, coding standards, architecture
if (/best practice|coding standard|architecture|pattern|principle|rule|guideline/i.test(headerLower)) {
return {
section: 'strategies_and_hard_rules',
content: `${header}: ${contentSnippet}`,
confidence: 0.85,
evidence: [filename]
};
}
// TROUBLESHOOTING: Known issues, gotchas, common problems
if (/troubleshoot|known issue|common problem|gotcha|pitfall|warning|caveat/i.test(headerLower)) {
return {
section: 'troubleshooting_and_pitfalls',
content: `${header}: ${contentSnippet}`,
confidence: 0.9,
evidence: [filename]
};
}
// APIS: Integration guides, library usage, API documentation
if (/api|integration|library|framework|dependency|tool|service/i.test(headerLower)) {
return {
section: 'apis_to_use',
content: `${header}: ${contentSnippet}`,
confidence: 0.8,
evidence: [filename]
};
}
// CODE SNIPPETS: Example code, how-to sections
if (/example|snippet|code|usage|how to|quick start/i.test(headerLower) &&
content.includes('```')) {
return {
section: 'useful_code_snippets',
content: `${header}: ${contentSnippet}`,
confidence: 0.75,
evidence: [filename]
};
}
return null;
}
/**
* Analyze local source files using GitHub Linguist (NEW - replaces hardcoded extensions!)
*
* Automatically detects ALL programming languages using Linguist
* Supports: TypeScript, JavaScript, Python, Java, Go, Rust, Ruby, PHP, C#, Kotlin, Swift, and 100+ more!
*/
async analyzeSourceFilesWithLinguist(repoPath, maxFiles = 5000) {
const fs = await import('fs/promises');
const path = await import('path');
const discoveredPatterns = [];
try {
// Use Linguist to detect all programming files
console.error(' 🔍 Detecting languages with GitHub Linguist...');
const programmingFiles = await this.languageDetector.getProgrammingFiles(repoPath, maxFiles);
const languageBreakdown = await this.languageDetector.getLanguageBreakdown(repoPath);
console.error(` 📊 Detected ${Object.keys(languageBreakdown).length} languages: ${Object.keys(languageBreakdown).join(', ')}`);
console.error(` 📁 Found ${programmingFiles.length} source files`);
// Add language breakdown as a pattern
for (const [lang, percentage] of Object.entries(languageBreakdown)) {
if (percentage > 5) { // Only include if >5% of codebase
discoveredPatterns.push({
section: 'strategies_and_hard_rules',
content: `Primary language: ${lang} (${percentage.toFixed(1)}% of codebase)`,
confidence: 0.95,
evidence: ['linguist-analysis']
});
}
}
// Analyze dependency files (package.json, requirements.txt, etc.)
const depPatterns = await this.analyzeDependencyFiles(repoPath);
discoveredPatterns.push(...depPatterns);
// Sample source files for deeper analysis
const sampleFiles = programmingFiles.slice(0, Math.min(50, programmingFiles.length));
for (const filePath of sampleFiles) {
try {
const content = await fs.readFile(filePath, 'utf-8');
const relativePath = path.relative(repoPath, filePath);
// Extract complete function bodies instead of summaries
const { extractFunctionBodies, isInteresting } = await import('../utils/code-extractor.js');
const functions = extractFunctionBodies(content, relativePath);
for (const func of functions) {
// Only include interesting code (10+ lines with async/error handling/API calls)
if (func.lines.length >= 10 && isInteresting(func.code)) {
discoveredPatterns.push({
section: 'useful_code_snippets',
content: func.code, // Complete code with imports
confidence: 0.9,
evidence: [relativePath]
});
}
}
// Also extract smaller interesting blocks (5-9 lines) for quick patterns
for (const func of functions) {
if (func.lines.length >= 5 && func.lines.length < 10 && isInteresting(func.code)) {
discoveredPatterns.push({
section: 'useful_code_snippets',
content: func.code,
confidence: 0.75,
evidence: [relativePath]
});
}
}
}
catch {
// Skip files that can't be read
}
}
}
catch (error) {
console.error('⚠️ Linguist analysis failed, falling back to basic detection:', error);
// Fallback to old method if Linguist fails
return this.analyzeSourceFiles(repoPath, ['*.ts', '*.js', '*.py'], maxFiles);
}
return discoveredPatterns;
}
/**
* Analyze dependency files (package.json, requirements.txt, Cargo.toml, etc.)
*/
async analyzeDependencyFiles(repoPath) {
const fs = await import('fs/promises');
const path = await import('path');
const patterns = [];
// TypeScript/JavaScript: package.json
try {
const packageJsonPath = path.join(repoPath, 'package.json');
const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
const allDeps = {
...packageJson.dependencies,
...packageJson.devDependencies
};
const topDeps = Object.keys(allDeps).slice(0, 10);
for (const dep of topDeps) {
patterns.push({
section: 'apis_to_use',
content: `Project uses ${dep} (${allDeps[dep]})`,
confidence: 0.9,
evidence: ['package.json']
});
}
// Framework detection
if (allDeps['react']) {
patterns.push({
section: 'strategies_and_hard_rules',
content: 'React framework - use functional components with hooks',
confidence: 0.85,
evidence: ['package.json']
});
}
if (allDeps['express'] || allDeps['fastify']) {
patterns.push({
section: 'strategies_and_hard_rules',
content: 'Node.js backend - use async/await for all routes',
confidence: 0.85,
evidence: ['package.json']
});
}
}
catch {
// No package.json
}
// Python: requirements.txt
try {
const requirementsPath = path.join(repoPath, 'requirements.txt');
const requirements = await fs.readFile(requirementsPath, 'utf-8');
const deps = requirements.split('\n').filter(line => line.trim() && !line.startsWith('#'));
for (const dep of deps.slice(0, 10)) {
const pkgName = dep.split('==')[0].split('>=')[0].trim();
patterns.push({
section: 'apis_to_use',
content: `Python project uses ${pkgName}`,
confidence: 0.9,
evidence: ['requirements.txt']
});
}
}
catch {
// No requirements.txt
}
// Rust: Cargo.toml
try {
const cargoPath = path.join(repoPath, 'Cargo.toml');
const cargo = await fs.readFile(cargoPath, 'utf-8');
if (cargo) {
patterns.push({
section: 'strategies_and_hard_rules',
content: 'Rust project - follows ownership and borrowing principles',
confidence: 0.9,
evidence: ['Cargo.toml']
});
}
}
catch {
// No Cargo.toml
}
// Go: go.mod
try {
const goModPath = path.join(repoPath, 'go.mod');
const goMod = await fs.readFile(goModPath, 'utf-8');
if (goMod) {
patterns.push({
section: 'strategies_and_hard_rules',
content: 'Go project - use goroutines for concurrency, defer for cleanup',
confidence: 0.9,
evidence: ['go.mod']
});
}
}
catch {
// No go.mod
}
return patterns;
}
/**
* OLD METHOD: Analyze local source files for patterns (FALLBACK ONLY)
*
* @deprecated Use analyzeSourceFilesWithLinguist instead
*/
async analyzeSourceFiles(repoPath, patterns, maxFiles = 5000) {
const fs = await import('fs/promises');
const path = await import('path');
const discoveredPatterns = [];
try {
// Read package.json for dependencies (TypeScript/JavaScript)
const packageJsonPath = path.join(repoPath, 'package.json');
try {
const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
const allDeps = {
...packageJson.dependencies,
...packageJson.devDependencies
};
// Top dependencies used in project
const topDeps = Object.keys(allDeps).slice(0, 10);
for (const dep of topDeps) {
discoveredPatterns.push({
section: 'apis_to_use',
content: `Project uses ${dep} (${allDeps[dep]})`,
confidence: 0.9,
evidence: ['package.json']
});
}
// Framework detection
if (allDeps['react']) {
discoveredPatterns.push({
section: 'strategies_and_hard_rules',
content: 'React framework - use functional components with hooks',
confidence: 0.85,
evidence: ['package.json']
});
}
if (allDeps['express'] || allDeps['fastify']) {
discoveredPatterns.push({
section: 'strategies_and_hard_rules',
content: 'Node.js backend - use async/await for all routes',
confidence: 0.85,
evidence: ['package.json']
});
}
}
catch {
// No package.json or parse error
}
// Read requirements.txt for dependencies (Python)
const requirementsPath = path.join(repoPath, 'requirements.txt');
try {
const requirements = await fs.readFile(requirementsPath, 'utf-8');
const deps = requirements.split('\n').filter(line => line.trim() && !line.startsWith('#'));
for (const dep of deps.slice(0, 10)) {
const pkgName = dep.split('==')[0].split('>=')[0].trim();
discoveredPatterns.push({
section: 'apis_to_use',
content: `Python project uses ${pkgName}`,
confidence: 0.9,
evidence: ['requirements.txt']
});
}
}
catch {
// No requirements.txt
}
// Scan for common patterns in source files
const sourceFiles = await this.findSourceFiles(repoPath, patterns, maxFiles);
const sampleFiles = sourceFiles.slice(0, Math.min(50, sourceFiles.length)); // Sample up to 50 files
for (const filePath of sampleFiles) {
try {
const content = await fs.readFile(filePath, 'utf-8');
const relativePath = path.relative(repoPath, filePath);
// Detect import patterns
const imports = this.extractImports(content, filePath);
for (const imp of imports.slice(0, 5)) {
discoveredPatterns.push({
section: 'useful_code_snippets',
content: `Common import: ${imp}`,
confidence: 0.7,
evidence: [relativePath]
});
}
// Detect API/endpoint patterns
if (content.includes('app.get(') || content.includes('app.post(')) {
discoveredPatterns.push({
section: 'apis_to_use',
content: `REST API endpoints defined in ${relativePath}`,
confidence: 0.8,
evidence: [relativePath]
});
}
// Detect database patterns
if (content.includes('prisma') || content.includes('mongoose') || content.includes('typeorm')) {
discoveredPatterns.push({
section: 'strategies_and_hard_rules',
content: 'Uses ORM for database access - define models before queries',
confidence: 0.75,
evidence: [relativePath]
});
}
// Detect async patterns
if (content.includes('async ') && content.includes('await ')) {
discoveredPatterns.push({
section: 'strategies_and_hard_rules',
content: 'Codebase uses async/await - ensure all async functions are awaited',
confidence: 0.8,
evidence: [relativePath]
});
}
}
catch {
// Skip files that can't be read
}
}
}
catch (error) {
console.error('⚠️ Source file analysis failed:', error);
}
return discoveredPatterns;
}
/**
* Find source files matching patterns
*/
async findSourceFiles(repoPath, _patterns, // Used for filter criteria, currently hardcoded in scanDir
maxFiles = 5000) {
const fs = await import('fs/promises');
const path = await import('path');
const files = [];
async function scanDir(dir, depth = 0) {
// Stop if we've hit the limit (unless unlimited)
if (maxFiles !== -1 && files.length >= maxFiles)
return true;
if (depth > 5)
return false; // Max depth
try {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
// Stop if we've hit the limit
if (maxFiles !== -1 && files.length >= maxFiles)
return true;
// Skip node_modules, .git, dist, build
if (['node_modules', '.git', 'dist', 'build', '.next', 'target', '__pycache__', 'venv'].includes(entry.name)) {
continue;
}
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
const shouldStop = await scanDir(fullPath, depth + 1);
if (shouldStop)
return true;
}
else if (entry.isFile()) {
// Check if matches patterns
const ext = path.extname(entry.name);
if (['.ts', '.js', '.tsx', '.jsx', '.py', '.java', '.go', '.rs', '.rb', '.php'].includes(ext)) {
files.push(fullPath);
}
}
}
}
catch {
// Skip directories we can't read
}
return false;
}
await scanDir(repoPath);
return files;
}
/**
* Extract import statements from source code
*/
extractImports(content, _filePath) {
const imports = [];
// TypeScript/JavaScript imports
const jsImportRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
let match;
while ((match = jsImportRegex.exec(content)) !== null) {
imports.push(match[1]);
}
// Python imports
const pyImportRegex = /^(?:from\s+(\S+)\s+)?import\s+(.+)$/gm;
while ((match = pyImportRegex.exec(content)) !== null) {
imports.push(match[1] || match[2].split(',')[0].trim());
}
return imports;
}
/**
* Extract patterns from commit analysis
*/
async extractPatternsFromCommits(commits, repoPath) {
const patterns = [];
const { extractAddedLinesFromDiff, isInteresting } = await import('../utils/code-extractor.js');
// 1. STRATEGIES from successful refactorings
const refactoringCommits = commits.filter(c => /refactor|improve|optimize|clean/i.test(c.message));
for (const commit of refactoringCommits.slice(0, 10)) {
try {
// Get actual diff for this commit
const { stdout: diff } = await execAsync(`cd "${repoPath}" && git show ${commit.hash}`, { maxBuffer: 1024 * 1024 * 10 });
const addedCode = extractAddedLinesFromDiff(diff);
// Only include if substantial code was added (10+ lines) and it's interesting
if (addedCode.split('\n').length >= 10 && isInteresting(addedCode)) {
patterns.push({
section: 'strategies_and_hard_rules',
content: `Refactoring pattern (${commit.message}):\n\n${addedCode.substring(0, 1500)}`,
confidence: 0.8,
evidence: [commit.hash, ...commit.files.slice(0, 3)]
});
}
}
catch {
// Fallback to commit message if git show fails
patterns.push({
section: 'strategies_and_hard_rules',
content: `Pattern from refactoring: ${commit.message}`,
confidence: 0.6,
evidence: [commit.hash]
});
}
}
// 2. TROUBLESHOOTING from bug fixes
const bugFixCommits = commits.filter(c => /fix|bug|error|crash|issue/i.test(c.message));
for (const commit of bugFixCommits.slice(0, 15)) {
try {
// Get actual fix code
const { stdout: diff } = await execAsync(`cd "${repoPath}" && git show ${commit.hash}`, { maxBuffer: 1024 * 1024 * 10 });
const fixedCode = extractAddedLinesFromDiff(diff);
if (fixedCode.split('\n').length >= 5 && isInteresting(fixedCode)) {
patterns.push({
section: 'troubleshooting_and_pitfalls',
content: `Bug fix (${commit.message}):\n\n${fixedCode.substring(0, 1500)}`,
confidence: 0.85,
evidence: [commit.hash, ...commit.files.slice(0, 3)]
});
}
}
catch {
// Fallback to commit message
patterns.push({
section: 'troubleshooting_and_pitfalls',
content: `Common issue: ${commit.message}`,
confidence: 0.7,
evidence: [commit.hash]
});
}
}
// 3. APIS from feature additions
const featureCommits = commits.filter(c => /add|implement|create|new/i.test(c.message) &&
!/(test|doc|comment)/i.test(c.message));
for (const commit of featureCommits.slice(0, 10)) {
if (commit.files.some(f => /api|service|client|interface/i.test(f))) {
try {
const { stdout: diff } = await execAsync(`cd "${repoPath}" && git show ${commit.hash}`, { maxBuffer: 1024 * 1024 * 10 });
const newCode = extractAddedLinesFromDiff(diff);
if (newCode.split('\n').length >= 10 && isInteresting(newCode)) {
patterns.push({
section: 'apis_to_use',
content: `API implementation (${commit.message}):\n\n${newCode.substring(0, 1500)}`,
confidence: 0.75,
evidence: [commit.hash, ...commit.files.slice(0, 3)]
});
}
}
catch {
// Skip if git show fails
}
}
}
// 4. FILE CHANGE PATTERNS - Files that change together
const fileCoOccurrence = this.analyzeFileCoOccurrence(commits);
for (const [fileSet, count] of fileCoOccurrence.slice(0, 5)) {
if (count >= 3) {
patterns.push({
section: 'strategies_and_hard_rules',
content: `Files that often change together: ${fileSet}`,
confidence: Math.min(0.9, count / 10),
evidence: [`Co-occurred ${count} times`]
});
}
}
// 5. COMMON ERROR PATTERNS
const errorPatterns = this.extractErrorPatterns(commits);
patterns.push(...errorPatterns);
return patterns;
}
/**
* Find files that frequently change together
*/
analyzeFileCoOccurrence(commits) {
const coOccurrence = new Map();
for (const commit of commits) {
if (commit.files.length >= 2 && commit.files.length <= 5) {
// Sort files to create consistent key
const fileSet = commit.files.sort().join(' + ');
coOccurrence.set(fileSet, (coOccurrence.get(fileSet) || 0) + 1);
}
}
return Array.from(coOccurrence.entries())
.sort((a, b) => b[1] - a[1]);
}
/**
* Extract error patterns from commit messages
*/
extractErrorPatterns(commits) {
const patterns = [];
const errorKeywords = [
'null pointer',
'undefined',
'not found',
'timeout',
'permission denied',
'connection refused',
'out of memory',
'race condition',
'deadlock'
];
for (const commit of commits) {
const messageLower = commit.message.toLowerCase();
for (const keyword of errorKeywords) {
if (messageLower.includes(keyword)) {
patterns.push({
section: 'troubleshooting_and_pitfalls',
content: `Watch out for ${keyword} errors: ${commit.message}`,
confidence: 0.75,
evidence: [commit.hash, ...commit.files.slice(0, 2)]
});
}
}
}
return patterns;
}
/**
* Build initial playbook from extracted patterns
*/
buildInitialPlaybook(patterns) {
const playbook = {
strategies_and_hard_rules: [],
useful_code_snippets: [],
troubleshooting_and_pitfalls: [],
apis_to_use: []
};
// Group patterns by section and deduplicate
const seenContent = new Set();
for (const pattern of patterns) {
// Skip duplicates
const contentKey = pattern.content.toLowerCase().substring(0, 50);
if (seenContent.has(contentKey))
continue;
seenContent.add(contentKey);
// Create bullet
const bullet = {
id: this.generateBulletId(),
section: pattern.section,
content: pattern.content,
helpful: 0, // Will be updated during online learning
harmful: 0,
confidence: pattern.confidence,
evidence: pattern.evidence,
observations: 0,
created_at: new Date().toISOString(),
last_used: new Date().toISOString()
};
playbook[pattern.section].push(bullet);
}
return playbook;
}
/**
* Generate bullet ID: ctx-{timestamp}-{random}
*/
generateBulletId() {
const timestamp = Math.floor(Date.now() / 1000);
const random = Math.random().toString(36).substring(2, 7);
return `ctx-${timestamp}-${random}`;
}
}
//# sourceMappingURL=initialization.js.map