scai
Version:
> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** > **100% local • No token cost • Private by design • GDPR-friendly** — made in Denmark/EU with ❤️.
278 lines (268 loc) • 10.4 kB
JavaScript
// indexCmd.ts
import fg from 'fast-glob';
import path from 'path';
import lockfile from 'proper-lockfile';
import { initSchema } from '../db/schema.js';
import { getDbForRepo, getDbPathForRepo } from '../db/client.js';
import { upsertFileTemplate } from '../db/sqlTemplates.js';
import { detectFileType } from '../fileRules/detectFileType.js';
import { classifyFile } from '../fileRules/classifyFile.js';
import { IGNORED_FOLDER_GLOBS } from '../fileRules/ignoredPaths.js';
import { Config } from '../config.js';
import { log } from '../utils/log.js';
import { startDaemon } from '../commands/DaemonCmd.js';
import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
import * as sqlTemplates from '../db/sqlTemplates.js';
import { RELATED_FILES_LIMIT } from '../constants.js';
import { generate } from '../lib/generate.js';
import { cleanupModule } from '../pipeline/modules/cleanupModule.js';
import { logInputOutput } from '../utils/promptLogHelper.js';
async function lockDb() {
try {
return await lockfile.lock(getDbPathForRepo());
}
catch (err) {
log('❌ Failed to acquire DB lock: ' + err);
throw err;
}
}
export async function runIndexCommand() {
try {
initSchema();
}
catch (err) {
console.error('❌ Failed to initialize schema:', err);
process.exit(1);
}
const indexDir = Config.getIndexDir() || process.cwd();
Config.setIndexDir(indexDir);
log(`📂 Scanning files in: ${indexDir}`);
const files = await fg('**/*.*', {
cwd: indexDir,
ignore: IGNORED_FOLDER_GLOBS,
absolute: true,
});
const db = getDbForRepo();
const release = await lockDb();
const countByExt = {};
let count = 0;
try {
for (const file of files) {
const classification = classifyFile(file);
if (classification !== 'valid') {
log(`⏭️ Skipping (${classification}): ${file}`);
continue;
}
try {
const type = detectFileType(file);
const normalizedPath = path.normalize(file).replace(/\\/g, '/');
const filename = path.basename(normalizedPath);
// --------------------------------------------------
// Enqueue file for daemon processing
// --------------------------------------------------
db.prepare(upsertFileTemplate).run({
path: normalizedPath,
filename,
summary: null,
type,
lastModified: null,
indexedAt: null,
});
const ext = path.extname(file);
countByExt[ext] = (countByExt[ext] || 0) + 1;
count++;
}
catch (err) {
log(`⚠️ Skipped in indexCmd ${file}: ${err instanceof Error ? err.message : err}`);
}
}
}
finally {
await release();
}
log('📊 Discovered files by extension:', JSON.stringify(countByExt, null, 2));
log(`✅ Done. Enqueued ${count} files for indexing.`);
// Kick the daemon — it now owns all processing
startDaemon();
}
// --------------------------------------------------
// QUERY API (read-only, used by CLI / search)
// --------------------------------------------------
export function queryFiles(safeQuery, limit = 10) {
const db = getDbForRepo();
return db
.prepare(sqlTemplates.queryFilesTemplate)
.all(safeQuery, limit);
}
// --------------------------------------------------
// searchFiles with semantic relevance check, conditional expansion, fallback on empty FTS, and I/O logging
// --------------------------------------------------
export async function searchFiles(originalQuery, query, topK = 5) {
const db = getDbForRepo();
// -----------------------------
// Primary FTS search
// -----------------------------
const safeQuery = sanitizeQueryForFts(query);
const primaryResults = db
.prepare(sqlTemplates.searchFilesTemplate)
.all(safeQuery, RELATED_FILES_LIMIT);
const seen = new Map();
primaryResults.forEach(r => seen.set(r.id, r));
// Log model input/output
logInputOutput("searchFiles FTS result", "input", {
originalQuery,
primaryResults: primaryResults.map(r => ({ id: r.id, filename: r.filename })),
});
// -----------------------------
// Fallback if primary FTS returns nothing
// -----------------------------
if (primaryResults.length === 0) {
const fallbackTerms = await expandQueryWithModel(originalQuery);
logInputOutput("searchFiles fallback terms if zero FTS results", "output", {
originalQuery,
suggestedTerms: fallbackTerms,
});
for (const term of fallbackTerms) {
const safeTerm = sanitizeQueryForFts(term);
const rows = db
.prepare(sqlTemplates.searchFilesTemplate)
.all(safeTerm, RELATED_FILES_LIMIT);
for (const row of rows) {
if (!seen.has(row.id))
seen.set(row.id, row);
}
}
}
// -----------------------------
// Model-assisted relevance check
// -----------------------------
const modelAnalysis = await checkFtsRelevanceWithModel(originalQuery, primaryResults);
logInputOutput("searchFiles relevance analysis", "output", modelAnalysis);
// -----------------------------
// Conditional secondary search if FTS not relevant
// -----------------------------
if (!modelAnalysis.relevant && modelAnalysis.suggestedTerms.length > 0) {
for (const term of modelAnalysis.suggestedTerms) {
const safeTerm = sanitizeQueryForFts(term);
const rows = db
.prepare(sqlTemplates.searchFilesTemplate)
.all(safeTerm, RELATED_FILES_LIMIT);
for (const row of rows) {
if (!seen.has(row.id))
seen.set(row.id, row);
}
}
logInputOutput("New searchFiles based on model terms", "output", seen);
}
if (seen.size === 0)
return [];
// -----------------------------
// Merge and rank
// -----------------------------
const merged = Array.from(seen.values()).sort((a, b) => (a.bm25Score ?? 0) - (b.bm25Score ?? 0));
const finalResults = mapFtsResults(merged, topK);
// Log combined search results
logInputOutput("Merged searchFiles result", "output", finalResults.map(r => ({ id: r.id, filename: r.filename })));
return finalResults;
}
// --------------------------------------------------
// Helpers
// --------------------------------------------------
function mapFtsResults(rows, topK) {
return rows.slice(0, topK).map(r => ({
id: r.id,
path: r.path,
filename: r.filename,
summary: r.summary,
type: r.type,
lastModified: r.lastModified,
bm25Score: r.bm25Score,
}));
}
/**
* Ask the model whether the initial FTS hits are relevant to the query.
* If not, suggest new search terms for another FTS pass.
*/
async function checkFtsRelevanceWithModel(query, ftsResults) {
if (ftsResults.length === 0) {
return { relevant: false, suggestedTerms: [] };
}
const prompt = `
You are assisting a code search system that uses full-text search (FTS)
over source code.
Query (natural language):
"${query}"
Initial FTS results (filenames and summaries):
${JSON.stringify(ftsResults)}
Task:
1. Decide whether these results are relevant to the query.
2. If they are NOT relevant, suggest alternative search terms.
IMPORTANT RULES FOR SUGGESTED TERMS:
- Terms MUST be likely to appear literally in source code.
- Prefer: filenames, module names, function names, variables, symbols, config keys.
- Use short identifiers (1–3 words max).
- Avoid natural-language phrases or explanations.
- Avoid conceptual or architectural descriptions.
- Examples of GOOD terms: "api", "router", "frontend", "backend", "client", "server", "routes", "config.ts"
- Examples of BAD terms: "frontend backend separation", "code architecture", "business logic"
Output format:
- If relevant:
{ "relevant": true, "suggestedTerms": [] }
- If not relevant:
{ "relevant": false, "suggestedTerms": ["term1", "term2", "term3"] }
Return ONLY valid JSON.
`.trim();
try {
const response = await generate({ content: prompt, query: "" });
const cleaned = await cleanupModule.run({
query,
content: response.data,
});
const data = cleaned.data;
// Type guard: ensure it's an object with correct properties
if (data &&
typeof data === "object" &&
"relevant" in data &&
"suggestedTerms" in data &&
typeof data.relevant === "boolean" &&
Array.isArray(data.suggestedTerms)) {
const relevant = data.relevant;
const suggestedTerms = data.suggestedTerms.filter((t) => typeof t === "string");
return { relevant, suggestedTerms };
}
return { relevant: false, suggestedTerms: [] };
}
catch (err) {
log(`⚠️ [searchFiles] Failed to check FTS relevance: ${String(err)}`);
return { relevant: false, suggestedTerms: [] };
}
}
;
async function expandQueryWithModel(query) {
const prompt = `
You are assisting a code search system.
Given a natural-language question about a codebase, return a JSON array
of 3–8 concrete search terms that are likely to appear literally in source code.
Rules:
- Return ONLY a JSON array of strings
- No explanations
- Prefer filenames, function names, symbols, library names
Question:
"${query}"
`.trim();
try {
const response = await generate({ content: prompt, query: "" });
const cleaned = await cleanupModule.run({
query,
content: response.data,
});
const terms = Array.isArray(cleaned.data)
? cleaned.data.filter((t) => typeof t === "string")
: [];
return terms;
}
catch (err) {
log(`⚠️ [searchFiles] Failed to expand query: ${String(err)}`);
return [];
}
}