scai
Version:
> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.
248 lines (243 loc) • 11.3 kB
JavaScript
import fg from 'fast-glob';
import path from 'path';
import { getDbForRepo } from '../db/client.js';
import { log } from '../utils/log.js';
import { generate } from '../lib/generate.js';
import { cleanupModule } from '../pipeline/modules/cleanupModule.js';
import { IGNORED_FOLDER_GLOBS } from '../fileRules/ignoredPaths.js';
export async function runFolderCapsuleBatch(maxFolders = 20) {
const db = getDbForRepo();
log('📦 Starting folder capsule batch...');
// --------------------------------------------------
// Stop if we already have enough capsules
// --------------------------------------------------
const existing = db.prepare(`SELECT COUNT(*) as c FROM folder_capsules`).get();
log(`📦 Existing folder capsules: ${existing.c}`);
if (existing.c >= maxFolders) {
log(`📦 Folder capsule batch: cap reached (${existing.c})`);
return false;
}
// --------------------------------------------------
// Scan folders with fast-glob
// --------------------------------------------------
log('🔍 Scanning folders...');
const folderPathsRaw = await fg('**/', {
cwd: process.cwd(),
onlyDirectories: true,
ignore: IGNORED_FOLDER_GLOBS,
dot: false,
absolute: true,
});
const folderPaths = folderPathsRaw.map(p => path.normalize(p).replace(/\\/g, '/'));
log(`📂 Found ${folderPaths.length} folders`);
if (!folderPaths.length) {
log('📦 No folders found after filtering');
return false;
}
// --------------------------------------------------
// Collect uncapsuled folders from DB
// --------------------------------------------------
log('📦 Collecting uncapsuled folders from DB...');
const uncapsuledFoldersRaw = db.prepare(`
SELECT folder, COUNT(*) AS fileCount
FROM (
SELECT substr(path, 1, length(path) - length(filename) - 1) AS folder,
filename, type
FROM files
WHERE processing_status != 'skipped'
)
LEFT JOIN folder_capsules fc ON fc.path = folder
WHERE fc.path IS NULL
GROUP BY folder
`).all();
const uncapsuledFolders = uncapsuledFoldersRaw
.map(f => ({ ...f, folder: path.normalize(path.resolve(f.folder)).replace(/\\/g, '/') }))
.filter(f => folderPaths.includes(f.folder));
log(`📦 ${uncapsuledFolders.length} uncapsuled folders match actual paths`);
if (!uncapsuledFolders.length) {
log('📦 No uncapsuled folders found');
return false;
}
// --------------------------------------------------
// Heuristic folder scoring
// --------------------------------------------------
function scoreFolder(folder, fileCount) {
let score = 0;
const depth = folder.split('/').length;
const p = folder.toLowerCase();
if (fileCount > 5)
score += 2;
if (fileCount > 15)
score += 2;
if (depth <= 6)
score += 2;
if (/src|modules|pipeline|agents|services|commands|lib/.test(p))
score += 3;
if (/css|sql|html|assets/.test(p))
score -= 3;
return score;
}
const ranked = uncapsuledFolders
.map(f => ({ ...f, score: scoreFolder(f.folder, f.fileCount) }))
.sort((a, b) => b.score - a.score)
.slice(0, maxFolders);
log(`📦 Top ${ranked.length} folders selected for processing`);
// --------------------------------------------------
// Process each top folder with try/catch
// --------------------------------------------------
for (const target of ranked) {
const folderPath = target.folder;
if (!folderPath) {
log(`⚠️ Skipping folder because folderPath is undefined`);
continue;
}
try {
log(`📦 Building folder capsule: ${folderPath} (score=${target.score})`);
// --------------------------------------------------
// Load files in folder from DB (inline folderPath)
// --------------------------------------------------
const folderPathSQL = folderPath.replace(/'/g, "''"); // escape single quotes
const files = db.prepare(`
SELECT path, filename, type
FROM files
WHERE path LIKE '${folderPathSQL}/%' AND processing_status != 'skipped'
`).all();
log(`📄 ${files.length} files found in folder`);
if (!files.length) {
log('⚠️ Folder empty, skipping');
continue;
}
// --------------------------------------------------
// Compute file-level scores
// --------------------------------------------------
const fileScores = files.map(f => {
const heuristicName = /index|main|cli|app|server|config/i.test(f.filename) ? 5 : 0;
const basename = path.basename(f.path).replace(/'/g, "''");
const incoming = db.prepare(`
SELECT COUNT(*) AS c
FROM files
WHERE path LIKE '${folderPathSQL}/%' AND processing_status != 'skipped' AND content_text LIKE '%${basename}%'
`).get();
const incomingScore = (incoming?.c || 0) * 2;
const row = db.prepare(`SELECT content_text FROM files_fts WHERE path = '${f.path.replace(/'/g, "''")}'`).get();
const code = row?.contentText || '';
const outgoingCount = (code.match(/from\s+['"].+['"]/g)?.length || 0) + (code.match(/require\(['"].+['"]\)/g)?.length || 0);
const sizeScore = Math.min(code.length / 2000, 3);
const totalScore = heuristicName + incomingScore + outgoingCount + sizeScore;
log(`📄 File score: ${f.filename} -> ${totalScore}`);
return { ...f, score: totalScore };
});
const representativeFiles = fileScores.sort((a, b) => b.score - a.score).slice(0, 2);
log(`📌 Representative files: ${representativeFiles.map(f => f.filename).join(', ')}`);
const candidateFiles = representativeFiles.map(f => {
const row = db.prepare(`SELECT content_text FROM files_fts WHERE path = '${f.path.replace(/'/g, "''")}'`).get();
return { path: f.path, code: row?.contentText?.slice(0, 2000) || '' };
});
// --------------------------------------------------
// Ask LLM to generate folder summary
// --------------------------------------------------
const prompt = `
You are analyzing a source code folder.
Folder path:
${folderPath}
Candidate files with code snippets:
${JSON.stringify(candidateFiles, null, 2)}
Task:
- Identify up to TWO files that best represent the purpose of this folder.
- Explain the folder's responsibility in ONE concise sentence.
- Return ONLY valid JSON.
Expected JSON shape:
{
"summary": "one sentence description",
"files": [
{ "path": "absolute/file/path", "summary": "optional short note" }
]
}
`.trim();
let folderSummary = '';
let keyFiles = [];
try {
log('🤖 Asking LLM for folder summary...');
const response = await generate({ content: prompt, query: '' });
const cleaned = await cleanupModule.run({ query: '', content: response.data });
const data = typeof cleaned.data === 'string' ? JSON.parse(cleaned.data) : cleaned.data;
if (data && typeof data === 'object') {
if (typeof data.summary === 'string')
folderSummary = data.summary;
if (Array.isArray(data.files)) {
keyFiles = data.files
.filter((f) => typeof f?.path === 'string')
.slice(0, 2)
.map((f) => ({ path: f.path, reason: f.summary || f.reason || 'representative file' }));
}
}
log(`📌 Folder summary: ${folderSummary}`);
}
catch (err) {
if (err instanceof Error) {
log(`🔥 Failed processing folder ${folderPath}: ${err.message}\n${err.stack}`);
}
else {
log(`🔥 Failed processing folder ${folderPath}:`, err);
}
}
// --------------------------------------------------
// Compute stats by file type
// --------------------------------------------------
const byType = {};
for (const f of files) {
const ext = f.type || path.extname(f.filename || '').replace('.', '') || 'unknown';
byType[ext] = (byType[ext] || 0) + 1;
}
// --------------------------------------------------
// Folder-level dependencies
// --------------------------------------------------
const importsFrom = new Set();
for (const f of files) {
const dir = path.dirname(f.path).replace(/\\/g, '/');
if (dir !== folderPath)
importsFrom.add(dir);
}
// --------------------------------------------------
// Build capsule
// --------------------------------------------------
const capsule = {
path: folderPath,
depth: folderPath.split('/').length,
stats: { fileCount: files.length, byType },
roles: [],
concerns: [],
keyFiles,
dependencies: { importsFrom: Array.from(importsFrom), usedBy: [] },
summary: folderSummary,
confidence: Math.min(0.9, 0.4 + target.score * 0.1),
};
// --------------------------------------------------
// Persist capsule (inline folderPath)
// --------------------------------------------------
const now = new Date().toISOString();
db.prepare(`
INSERT INTO folder_capsules (
path, depth, capsule_json, confidence, last_generated, source_file_count
)
VALUES ('${capsule.path.replace(/'/g, "''")}', ${capsule.depth}, '${JSON.stringify(capsule)}', ${capsule.confidence}, '${now}', ${files.length})
`).run();
db.prepare(`
UPDATE files
SET processing_status = 'capsuled'
WHERE path LIKE '${folderPathSQL}/%'
`).run();
log(`✅ Folder capsule written: ${folderPath}`);
}
catch (err) {
if (err instanceof Error) {
log(`🔥 Failed processing folder ${folderPath}: ${err.message}\n${err.stack}`);
}
else {
log(`🔥 Failed processing folder ${folderPath}:`, err);
}
}
}
log('📦 Folder capsule batch complete.');
return true;
}