scai
Version:
> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.
184 lines (180 loc) • 6.42 kB
JavaScript
// indexCmd.ts
import fg from 'fast-glob';
import path from 'path';
import lockfile from 'proper-lockfile';
import { initSchema } from '../db/schema.js';
import { getDbForRepo, getDbPathForRepo } from '../db/client.js';
import { upsertFileTemplate } from '../db/sqlTemplates.js';
import { detectFileType } from '../fileRules/detectFileType.js';
import { classifyFile } from '../fileRules/classifyFile.js';
import { IGNORED_FOLDER_GLOBS } from '../fileRules/ignoredPaths.js';
import { Config } from '../config.js';
import { log } from '../utils/log.js';
import { startDaemon } from '../commands/DaemonCmd.js';
import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
import * as sqlTemplates from '../db/sqlTemplates.js';
import { RELATED_FILES_LIMIT } from '../constants.js';
import { generate } from '../lib/generate.js';
import { cleanupModule } from '../pipeline/modules/cleanupModule.js';
async function lockDb() {
try {
return await lockfile.lock(getDbPathForRepo());
}
catch (err) {
log('❌ Failed to acquire DB lock: ' + err);
throw err;
}
}
export async function runIndexCommand() {
try {
initSchema();
}
catch (err) {
console.error('❌ Failed to initialize schema:', err);
process.exit(1);
}
const indexDir = Config.getIndexDir() || process.cwd();
Config.setIndexDir(indexDir);
log(`📂 Scanning files in: ${indexDir}`);
const files = await fg('**/*.*', {
cwd: indexDir,
ignore: IGNORED_FOLDER_GLOBS,
absolute: true,
});
const db = getDbForRepo();
const release = await lockDb();
const countByExt = {};
let count = 0;
try {
for (const file of files) {
const classification = classifyFile(file);
if (classification !== 'valid') {
log(`⏭️ Skipping (${classification}): ${file}`);
continue;
}
try {
const type = detectFileType(file);
const normalizedPath = path.normalize(file).replace(/\\/g, '/');
const filename = path.basename(normalizedPath);
// --------------------------------------------------
// Enqueue file for daemon processing
// --------------------------------------------------
db.prepare(upsertFileTemplate).run({
path: normalizedPath,
filename,
summary: null,
type,
lastModified: null,
indexedAt: null,
});
const ext = path.extname(file);
countByExt[ext] = (countByExt[ext] || 0) + 1;
count++;
}
catch (err) {
log(`⚠️ Skipped in indexCmd ${file}: ${err instanceof Error ? err.message : err}`);
}
}
}
finally {
await release();
}
log('📊 Discovered files by extension:', JSON.stringify(countByExt, null, 2));
log(`✅ Done. Enqueued ${count} files for indexing.`);
// Kick the daemon — it now owns all processing
startDaemon();
}
// --------------------------------------------------
// QUERY API (read-only, used by CLI / search)
// --------------------------------------------------
export function queryFiles(safeQuery, limit = 10) {
const db = getDbForRepo();
return db
.prepare(sqlTemplates.queryFilesTemplate)
.all(safeQuery, limit);
}
// --------------------------------------------------
// searchFiles with semantic fallback
// --------------------------------------------------
export async function searchFiles(query, topK = 5) {
const db = getDbForRepo();
// -----------------------------
// Primary FTS search
// -----------------------------
const safeQuery = sanitizeQueryForFts(query);
const primaryResults = db
.prepare(sqlTemplates.searchFilesTemplate)
.all(safeQuery, RELATED_FILES_LIMIT);
if (primaryResults.length > 0) {
return mapFtsResults(primaryResults, topK);
}
// -----------------------------
// Fallback: model-assisted expansion
// -----------------------------
const expandedTerms = await expandQueryWithModel(query);
if (expandedTerms.length === 0)
return [];
log(`🔁 [searchFiles] Fallback used for "${query}". Expanded terms: ${expandedTerms.join(", ")}`);
const seen = new Map();
for (const term of expandedTerms) {
const safeTerm = sanitizeQueryForFts(term);
const rows = db
.prepare(sqlTemplates.searchFilesTemplate)
.all(safeTerm, RELATED_FILES_LIMIT);
for (const row of rows) {
if (!seen.has(row.id)) {
seen.set(row.id, row);
}
}
}
if (seen.size === 0)
return [];
const merged = Array.from(seen.values()).sort((a, b) => a.bm25Score - b.bm25Score // lower = more relevant
);
return mapFtsResults(merged, topK);
}
// --------------------------------------------------
// Helpers
// --------------------------------------------------
function mapFtsResults(rows, topK) {
return rows.slice(0, topK).map(r => ({
id: r.id,
path: r.path,
filename: r.filename,
summary: r.summary,
type: r.type,
lastModified: r.lastModified,
bm25Score: r.bm25Score,
}));
}
async function expandQueryWithModel(query) {
const prompt = `
You are assisting a code search system.
Given a natural-language question about a codebase, return a JSON array
of 3–8 concrete search terms that are likely to appear literally in source code.
Rules:
- Return ONLY a JSON array of strings
- No explanations
- Prefer filenames, function names, symbols, library names
Question:
"${query}"
`.trim();
try {
const response = await generate({
content: prompt,
query: "",
});
const cleaned = await cleanupModule.run({
query,
content: response.data,
});
const terms = Array.isArray(cleaned.data)
? cleaned.data.filter((t) => typeof t === "string")
: [];
return terms;
}
catch (err) {
log(`⚠️ [searchFiles] Failed to expand query "${query}": ${String(err)}`);
return [];
}
}