UNPKG

scai

Version:

> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** > **100% local • No token cost • Private by design • GDPR-friendly** — made in Denmark/EU with ❤️.

278 lines (268 loc) 10.4 kB
// indexCmd.ts import fg from 'fast-glob'; import path from 'path'; import lockfile from 'proper-lockfile'; import { initSchema } from '../db/schema.js'; import { getDbForRepo, getDbPathForRepo } from '../db/client.js'; import { upsertFileTemplate } from '../db/sqlTemplates.js'; import { detectFileType } from '../fileRules/detectFileType.js'; import { classifyFile } from '../fileRules/classifyFile.js'; import { IGNORED_FOLDER_GLOBS } from '../fileRules/ignoredPaths.js'; import { Config } from '../config.js'; import { log } from '../utils/log.js'; import { startDaemon } from '../commands/DaemonCmd.js'; import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js'; import * as sqlTemplates from '../db/sqlTemplates.js'; import { RELATED_FILES_LIMIT } from '../constants.js'; import { generate } from '../lib/generate.js'; import { cleanupModule } from '../pipeline/modules/cleanupModule.js'; import { logInputOutput } from '../utils/promptLogHelper.js'; async function lockDb() { try { return await lockfile.lock(getDbPathForRepo()); } catch (err) { log('❌ Failed to acquire DB lock: ' + err); throw err; } } export async function runIndexCommand() { try { initSchema(); } catch (err) { console.error('❌ Failed to initialize schema:', err); process.exit(1); } const indexDir = Config.getIndexDir() || process.cwd(); Config.setIndexDir(indexDir); log(`📂 Scanning files in: ${indexDir}`); const files = await fg('**/*.*', { cwd: indexDir, ignore: IGNORED_FOLDER_GLOBS, absolute: true, }); const db = getDbForRepo(); const release = await lockDb(); const countByExt = {}; let count = 0; try { for (const file of files) { const classification = classifyFile(file); if (classification !== 'valid') { log(`⏭️ Skipping (${classification}): ${file}`); continue; } try { const type = detectFileType(file); const normalizedPath = path.normalize(file).replace(/\\/g, '/'); const filename = path.basename(normalizedPath); // -------------------------------------------------- // Enqueue file for daemon processing // -------------------------------------------------- db.prepare(upsertFileTemplate).run({ path: normalizedPath, filename, summary: null, type, lastModified: null, indexedAt: null, }); const ext = path.extname(file); countByExt[ext] = (countByExt[ext] || 0) + 1; count++; } catch (err) { log(`⚠️ Skipped in indexCmd ${file}: ${err instanceof Error ? err.message : err}`); } } } finally { await release(); } log('📊 Discovered files by extension:', JSON.stringify(countByExt, null, 2)); log(`✅ Done. Enqueued ${count} files for indexing.`); // Kick the daemon — it now owns all processing startDaemon(); } // -------------------------------------------------- // QUERY API (read-only, used by CLI / search) // -------------------------------------------------- export function queryFiles(safeQuery, limit = 10) { const db = getDbForRepo(); return db .prepare(sqlTemplates.queryFilesTemplate) .all(safeQuery, limit); } // -------------------------------------------------- // searchFiles with semantic relevance check, conditional expansion, fallback on empty FTS, and I/O logging // -------------------------------------------------- export async function searchFiles(originalQuery, query, topK = 5) { const db = getDbForRepo(); // ----------------------------- // Primary FTS search // ----------------------------- const safeQuery = sanitizeQueryForFts(query); const primaryResults = db .prepare(sqlTemplates.searchFilesTemplate) .all(safeQuery, RELATED_FILES_LIMIT); const seen = new Map(); primaryResults.forEach(r => seen.set(r.id, r)); // Log model input/output logInputOutput("searchFiles FTS result", "input", { originalQuery, primaryResults: primaryResults.map(r => ({ id: r.id, filename: r.filename })), }); // ----------------------------- // Fallback if primary FTS returns nothing // ----------------------------- if (primaryResults.length === 0) { const fallbackTerms = await expandQueryWithModel(originalQuery); logInputOutput("searchFiles fallback terms if zero FTS results", "output", { originalQuery, suggestedTerms: fallbackTerms, }); for (const term of fallbackTerms) { const safeTerm = sanitizeQueryForFts(term); const rows = db .prepare(sqlTemplates.searchFilesTemplate) .all(safeTerm, RELATED_FILES_LIMIT); for (const row of rows) { if (!seen.has(row.id)) seen.set(row.id, row); } } } // ----------------------------- // Model-assisted relevance check // ----------------------------- const modelAnalysis = await checkFtsRelevanceWithModel(originalQuery, primaryResults); logInputOutput("searchFiles relevance analysis", "output", modelAnalysis); // ----------------------------- // Conditional secondary search if FTS not relevant // ----------------------------- if (!modelAnalysis.relevant && modelAnalysis.suggestedTerms.length > 0) { for (const term of modelAnalysis.suggestedTerms) { const safeTerm = sanitizeQueryForFts(term); const rows = db .prepare(sqlTemplates.searchFilesTemplate) .all(safeTerm, RELATED_FILES_LIMIT); for (const row of rows) { if (!seen.has(row.id)) seen.set(row.id, row); } } logInputOutput("New searchFiles based on model terms", "output", seen); } if (seen.size === 0) return []; // ----------------------------- // Merge and rank // ----------------------------- const merged = Array.from(seen.values()).sort((a, b) => (a.bm25Score ?? 0) - (b.bm25Score ?? 0)); const finalResults = mapFtsResults(merged, topK); // Log combined search results logInputOutput("Merged searchFiles result", "output", finalResults.map(r => ({ id: r.id, filename: r.filename }))); return finalResults; } // -------------------------------------------------- // Helpers // -------------------------------------------------- function mapFtsResults(rows, topK) { return rows.slice(0, topK).map(r => ({ id: r.id, path: r.path, filename: r.filename, summary: r.summary, type: r.type, lastModified: r.lastModified, bm25Score: r.bm25Score, })); } /** * Ask the model whether the initial FTS hits are relevant to the query. * If not, suggest new search terms for another FTS pass. */ async function checkFtsRelevanceWithModel(query, ftsResults) { if (ftsResults.length === 0) { return { relevant: false, suggestedTerms: [] }; } const prompt = ` You are assisting a code search system that uses full-text search (FTS) over source code. Query (natural language): "${query}" Initial FTS results (filenames and summaries): ${JSON.stringify(ftsResults)} Task: 1. Decide whether these results are relevant to the query. 2. If they are NOT relevant, suggest alternative search terms. IMPORTANT RULES FOR SUGGESTED TERMS: - Terms MUST be likely to appear literally in source code. - Prefer: filenames, module names, function names, variables, symbols, config keys. - Use short identifiers (1–3 words max). - Avoid natural-language phrases or explanations. - Avoid conceptual or architectural descriptions. - Examples of GOOD terms: "api", "router", "frontend", "backend", "client", "server", "routes", "config.ts" - Examples of BAD terms: "frontend backend separation", "code architecture", "business logic" Output format: - If relevant: { "relevant": true, "suggestedTerms": [] } - If not relevant: { "relevant": false, "suggestedTerms": ["term1", "term2", "term3"] } Return ONLY valid JSON. `.trim(); try { const response = await generate({ content: prompt, query: "" }); const cleaned = await cleanupModule.run({ query, content: response.data, }); const data = cleaned.data; // Type guard: ensure it's an object with correct properties if (data && typeof data === "object" && "relevant" in data && "suggestedTerms" in data && typeof data.relevant === "boolean" && Array.isArray(data.suggestedTerms)) { const relevant = data.relevant; const suggestedTerms = data.suggestedTerms.filter((t) => typeof t === "string"); return { relevant, suggestedTerms }; } return { relevant: false, suggestedTerms: [] }; } catch (err) { log(`⚠️ [searchFiles] Failed to check FTS relevance: ${String(err)}`); return { relevant: false, suggestedTerms: [] }; } } ; async function expandQueryWithModel(query) { const prompt = ` You are assisting a code search system. Given a natural-language question about a codebase, return a JSON array of 3–8 concrete search terms that are likely to appear literally in source code. Rules: - Return ONLY a JSON array of strings - No explanations - Prefer filenames, function names, symbols, library names Question: "${query}" `.trim(); try { const response = await generate({ content: prompt, query: "" }); const cleaned = await cleanupModule.run({ query, content: response.data, }); const terms = Array.isArray(cleaned.data) ? cleaned.data.filter((t) => typeof t === "string") : []; return terms; } catch (err) { log(`⚠️ [searchFiles] Failed to expand query: ${String(err)}`); return []; } }