scai
Version:
> AI-powered CLI tool for commit messages **and** pull request reviews â using local models.
107 lines (106 loc) âĸ 4.48 kB
JavaScript
import { indexFunctionsForFile } from '../db/functionIndex.js';
import fs from 'fs/promises';
import fsSync from 'fs';
import { generateEmbedding } from '../lib/generateEmbedding.js';
import { log } from '../utils/log.js';
import lockfile from 'proper-lockfile';
import { summaryModule } from '../pipeline/modules/summaryModule.js';
import { classifyFile } from '../fileRules/classifyFile.js';
import { getDbForRepo, getDbPathForRepo } from '../db/client.js';
import { markFileAsSkippedByPath, selectUnprocessedFiles, updateFileWithSummaryAndEmbedding, } from '../db/sqlTemplates.js';
const MAX_FILES_PER_BATCH = 5;
/**
* Acquires a lock on the database to ensure that only one daemon batch
* can modify it at a time.
*/
async function lockDb() {
try {
return await lockfile.lock(getDbPathForRepo());
}
catch (err) {
log('â Failed to acquire DB lock: ' + err);
throw err;
}
}
/**
* Runs a daemon batch to process up to MAX_FILES_PER_BATCH unprocessed files.
* This includes:
* - Verifying file existence and validity
* - Generating summaries and embeddings if needed
* - Extracting functions from source files
* - Marking skipped files as necessary
*/
export async function runDaemonBatch() {
log('đĄ Starting daemon batch...');
// Selects up to MAX_FILES_PER_BATCH files that haven't been processed yet
const db = getDbForRepo();
const rows = db.prepare(selectUnprocessedFiles).all(MAX_FILES_PER_BATCH);
if (rows.length === 0) {
log('â
No files left to process.');
return false;
}
const release = await lockDb();
for (const row of rows) {
log(`đ Processing file: ${row.path}`);
// Skip if file is missing from the file system
if (!fsSync.existsSync(row.path)) {
log(`â ī¸ Skipped missing file: ${row.path}`);
db.prepare(markFileAsSkippedByPath).run({ path: row.path });
continue;
}
// Skip if file is classified as something we don't process
const classification = classifyFile(row.path);
if (classification !== 'valid') {
log(`âī¸ Skipping (${classification}): ${row.path}`);
db.prepare(markFileAsSkippedByPath).run({ path: row.path });
continue;
}
try {
const content = await fs.readFile(row.path, 'utf-8');
// Determine whether the file needs to be re-summarized
const needsResummary = !row.summary ||
!row.indexed_at ||
(row.last_modified && new Date(row.last_modified) > new Date(row.indexed_at));
if (needsResummary) {
log(`đ Generating summary for ${row.path}...`);
// Generate a summary using the summary pipeline
const summaryResult = await summaryModule.run({ content, filepath: row.path });
const summary = summaryResult?.summary?.trim() || null;
let embedding = null;
// Generate an embedding from the summary (if present)
if (summary) {
const vector = await generateEmbedding(summary);
if (vector) {
embedding = JSON.stringify(vector);
}
}
// Update the file record with the new summary and embedding
db.prepare(updateFileWithSummaryAndEmbedding).run({
summary,
embedding,
path: row.path,
});
log(`â
Updated summary & embedding for ${row.path}`);
}
else {
log(`⥠Skipped summary (up-to-date) for ${row.path}`);
}
// Extract top-level functions from the file and update the DB
const extracted = await indexFunctionsForFile(row.path, row.id);
if (extracted) {
log(`â
Function extraction complete for ${row.path}\n`);
}
else {
log(`âšī¸ No functions extracted for ${row.path}\n`);
}
}
catch (err) {
log(`â Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}\n`);
}
// Add a small delay to throttle processing
await new Promise(resolve => setTimeout(resolve, 200));
}
await release();
log('â
Finished daemon batch.\n\n');
return true;
}