UNPKG

scai

Version:

> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.

248 lines (243 loc) 11.3 kB
import fg from 'fast-glob'; import path from 'path'; import { getDbForRepo } from '../db/client.js'; import { log } from '../utils/log.js'; import { generate } from '../lib/generate.js'; import { cleanupModule } from '../pipeline/modules/cleanupModule.js'; import { IGNORED_FOLDER_GLOBS } from '../fileRules/ignoredPaths.js'; export async function runFolderCapsuleBatch(maxFolders = 20) { const db = getDbForRepo(); log('📦 Starting folder capsule batch...'); // -------------------------------------------------- // Stop if we already have enough capsules // -------------------------------------------------- const existing = db.prepare(`SELECT COUNT(*) as c FROM folder_capsules`).get(); log(`📦 Existing folder capsules: ${existing.c}`); if (existing.c >= maxFolders) { log(`📦 Folder capsule batch: cap reached (${existing.c})`); return false; } // -------------------------------------------------- // Scan folders with fast-glob // -------------------------------------------------- log('🔍 Scanning folders...'); const folderPathsRaw = await fg('**/', { cwd: process.cwd(), onlyDirectories: true, ignore: IGNORED_FOLDER_GLOBS, dot: false, absolute: true, }); const folderPaths = folderPathsRaw.map(p => path.normalize(p).replace(/\\/g, '/')); log(`📂 Found ${folderPaths.length} folders`); if (!folderPaths.length) { log('📦 No folders found after filtering'); return false; } // -------------------------------------------------- // Collect uncapsuled folders from DB // -------------------------------------------------- log('📦 Collecting uncapsuled folders from DB...'); const uncapsuledFoldersRaw = db.prepare(` SELECT folder, COUNT(*) AS fileCount FROM ( SELECT substr(path, 1, length(path) - length(filename) - 1) AS folder, filename, type FROM files WHERE processing_status != 'skipped' ) LEFT JOIN folder_capsules fc ON fc.path = folder WHERE fc.path IS NULL GROUP BY folder `).all(); const uncapsuledFolders = uncapsuledFoldersRaw .map(f => ({ ...f, folder: path.normalize(path.resolve(f.folder)).replace(/\\/g, '/') })) .filter(f => folderPaths.includes(f.folder)); log(`📦 ${uncapsuledFolders.length} uncapsuled folders match actual paths`); if (!uncapsuledFolders.length) { log('📦 No uncapsuled folders found'); return false; } // -------------------------------------------------- // Heuristic folder scoring // -------------------------------------------------- function scoreFolder(folder, fileCount) { let score = 0; const depth = folder.split('/').length; const p = folder.toLowerCase(); if (fileCount > 5) score += 2; if (fileCount > 15) score += 2; if (depth <= 6) score += 2; if (/src|modules|pipeline|agents|services|commands|lib/.test(p)) score += 3; if (/css|sql|html|assets/.test(p)) score -= 3; return score; } const ranked = uncapsuledFolders .map(f => ({ ...f, score: scoreFolder(f.folder, f.fileCount) })) .sort((a, b) => b.score - a.score) .slice(0, maxFolders); log(`📦 Top ${ranked.length} folders selected for processing`); // -------------------------------------------------- // Process each top folder with try/catch // -------------------------------------------------- for (const target of ranked) { const folderPath = target.folder; if (!folderPath) { log(`⚠️ Skipping folder because folderPath is undefined`); continue; } try { log(`📦 Building folder capsule: ${folderPath} (score=${target.score})`); // -------------------------------------------------- // Load files in folder from DB (inline folderPath) // -------------------------------------------------- const folderPathSQL = folderPath.replace(/'/g, "''"); // escape single quotes const files = db.prepare(` SELECT path, filename, type FROM files WHERE path LIKE '${folderPathSQL}/%' AND processing_status != 'skipped' `).all(); log(`📄 ${files.length} files found in folder`); if (!files.length) { log('⚠️ Folder empty, skipping'); continue; } // -------------------------------------------------- // Compute file-level scores // -------------------------------------------------- const fileScores = files.map(f => { const heuristicName = /index|main|cli|app|server|config/i.test(f.filename) ? 5 : 0; const basename = path.basename(f.path).replace(/'/g, "''"); const incoming = db.prepare(` SELECT COUNT(*) AS c FROM files WHERE path LIKE '${folderPathSQL}/%' AND processing_status != 'skipped' AND content_text LIKE '%${basename}%' `).get(); const incomingScore = (incoming?.c || 0) * 2; const row = db.prepare(`SELECT content_text FROM files_fts WHERE path = '${f.path.replace(/'/g, "''")}'`).get(); const code = row?.contentText || ''; const outgoingCount = (code.match(/from\s+['"].+['"]/g)?.length || 0) + (code.match(/require\(['"].+['"]\)/g)?.length || 0); const sizeScore = Math.min(code.length / 2000, 3); const totalScore = heuristicName + incomingScore + outgoingCount + sizeScore; log(`📄 File score: ${f.filename} -> ${totalScore}`); return { ...f, score: totalScore }; }); const representativeFiles = fileScores.sort((a, b) => b.score - a.score).slice(0, 2); log(`📌 Representative files: ${representativeFiles.map(f => f.filename).join(', ')}`); const candidateFiles = representativeFiles.map(f => { const row = db.prepare(`SELECT content_text FROM files_fts WHERE path = '${f.path.replace(/'/g, "''")}'`).get(); return { path: f.path, code: row?.contentText?.slice(0, 2000) || '' }; }); // -------------------------------------------------- // Ask LLM to generate folder summary // -------------------------------------------------- const prompt = ` You are analyzing a source code folder. Folder path: ${folderPath} Candidate files with code snippets: ${JSON.stringify(candidateFiles, null, 2)} Task: - Identify up to TWO files that best represent the purpose of this folder. - Explain the folder's responsibility in ONE concise sentence. - Return ONLY valid JSON. Expected JSON shape: { "summary": "one sentence description", "files": [ { "path": "absolute/file/path", "summary": "optional short note" } ] } `.trim(); let folderSummary = ''; let keyFiles = []; try { log('🤖 Asking LLM for folder summary...'); const response = await generate({ content: prompt, query: '' }); const cleaned = await cleanupModule.run({ query: '', content: response.data }); const data = typeof cleaned.data === 'string' ? JSON.parse(cleaned.data) : cleaned.data; if (data && typeof data === 'object') { if (typeof data.summary === 'string') folderSummary = data.summary; if (Array.isArray(data.files)) { keyFiles = data.files .filter((f) => typeof f?.path === 'string') .slice(0, 2) .map((f) => ({ path: f.path, reason: f.summary || f.reason || 'representative file' })); } } log(`📌 Folder summary: ${folderSummary}`); } catch (err) { if (err instanceof Error) { log(`🔥 Failed processing folder ${folderPath}: ${err.message}\n${err.stack}`); } else { log(`🔥 Failed processing folder ${folderPath}:`, err); } } // -------------------------------------------------- // Compute stats by file type // -------------------------------------------------- const byType = {}; for (const f of files) { const ext = f.type || path.extname(f.filename || '').replace('.', '') || 'unknown'; byType[ext] = (byType[ext] || 0) + 1; } // -------------------------------------------------- // Folder-level dependencies // -------------------------------------------------- const importsFrom = new Set(); for (const f of files) { const dir = path.dirname(f.path).replace(/\\/g, '/'); if (dir !== folderPath) importsFrom.add(dir); } // -------------------------------------------------- // Build capsule // -------------------------------------------------- const capsule = { path: folderPath, depth: folderPath.split('/').length, stats: { fileCount: files.length, byType }, roles: [], concerns: [], keyFiles, dependencies: { importsFrom: Array.from(importsFrom), usedBy: [] }, summary: folderSummary, confidence: Math.min(0.9, 0.4 + target.score * 0.1), }; // -------------------------------------------------- // Persist capsule (inline folderPath) // -------------------------------------------------- const now = new Date().toISOString(); db.prepare(` INSERT INTO folder_capsules ( path, depth, capsule_json, confidence, last_generated, source_file_count ) VALUES ('${capsule.path.replace(/'/g, "''")}', ${capsule.depth}, '${JSON.stringify(capsule)}', ${capsule.confidence}, '${now}', ${files.length}) `).run(); db.prepare(` UPDATE files SET processing_status = 'capsuled' WHERE path LIKE '${folderPathSQL}/%' `).run(); log(`✅ Folder capsule written: ${folderPath}`); } catch (err) { if (err instanceof Error) { log(`🔥 Failed processing folder ${folderPath}: ${err.message}\n${err.stack}`); } else { log(`🔥 Failed processing folder ${folderPath}:`, err); } } } log('📦 Folder capsule batch complete.'); return true; }