UNPKG

scai

Version:

> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.

184 lines (180 loc) 6.42 kB
// indexCmd.ts import fg from 'fast-glob'; import path from 'path'; import lockfile from 'proper-lockfile'; import { initSchema } from '../db/schema.js'; import { getDbForRepo, getDbPathForRepo } from '../db/client.js'; import { upsertFileTemplate } from '../db/sqlTemplates.js'; import { detectFileType } from '../fileRules/detectFileType.js'; import { classifyFile } from '../fileRules/classifyFile.js'; import { IGNORED_FOLDER_GLOBS } from '../fileRules/ignoredPaths.js'; import { Config } from '../config.js'; import { log } from '../utils/log.js'; import { startDaemon } from '../commands/DaemonCmd.js'; import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js'; import * as sqlTemplates from '../db/sqlTemplates.js'; import { RELATED_FILES_LIMIT } from '../constants.js'; import { generate } from '../lib/generate.js'; import { cleanupModule } from '../pipeline/modules/cleanupModule.js'; async function lockDb() { try { return await lockfile.lock(getDbPathForRepo()); } catch (err) { log('❌ Failed to acquire DB lock: ' + err); throw err; } } export async function runIndexCommand() { try { initSchema(); } catch (err) { console.error('❌ Failed to initialize schema:', err); process.exit(1); } const indexDir = Config.getIndexDir() || process.cwd(); Config.setIndexDir(indexDir); log(`📂 Scanning files in: ${indexDir}`); const files = await fg('**/*.*', { cwd: indexDir, ignore: IGNORED_FOLDER_GLOBS, absolute: true, }); const db = getDbForRepo(); const release = await lockDb(); const countByExt = {}; let count = 0; try { for (const file of files) { const classification = classifyFile(file); if (classification !== 'valid') { log(`⏭️ Skipping (${classification}): ${file}`); continue; } try { const type = detectFileType(file); const normalizedPath = path.normalize(file).replace(/\\/g, '/'); const filename = path.basename(normalizedPath); // -------------------------------------------------- // Enqueue file for daemon processing // -------------------------------------------------- db.prepare(upsertFileTemplate).run({ path: normalizedPath, filename, summary: null, type, lastModified: null, indexedAt: null, }); const ext = path.extname(file); countByExt[ext] = (countByExt[ext] || 0) + 1; count++; } catch (err) { log(`⚠️ Skipped in indexCmd ${file}: ${err instanceof Error ? err.message : err}`); } } } finally { await release(); } log('📊 Discovered files by extension:', JSON.stringify(countByExt, null, 2)); log(`✅ Done. Enqueued ${count} files for indexing.`); // Kick the daemon — it now owns all processing startDaemon(); } // -------------------------------------------------- // QUERY API (read-only, used by CLI / search) // -------------------------------------------------- export function queryFiles(safeQuery, limit = 10) { const db = getDbForRepo(); return db .prepare(sqlTemplates.queryFilesTemplate) .all(safeQuery, limit); } // -------------------------------------------------- // searchFiles with semantic fallback // -------------------------------------------------- export async function searchFiles(query, topK = 5) { const db = getDbForRepo(); // ----------------------------- // Primary FTS search // ----------------------------- const safeQuery = sanitizeQueryForFts(query); const primaryResults = db .prepare(sqlTemplates.searchFilesTemplate) .all(safeQuery, RELATED_FILES_LIMIT); if (primaryResults.length > 0) { return mapFtsResults(primaryResults, topK); } // ----------------------------- // Fallback: model-assisted expansion // ----------------------------- const expandedTerms = await expandQueryWithModel(query); if (expandedTerms.length === 0) return []; log(`🔁 [searchFiles] Fallback used for "${query}". Expanded terms: ${expandedTerms.join(", ")}`); const seen = new Map(); for (const term of expandedTerms) { const safeTerm = sanitizeQueryForFts(term); const rows = db .prepare(sqlTemplates.searchFilesTemplate) .all(safeTerm, RELATED_FILES_LIMIT); for (const row of rows) { if (!seen.has(row.id)) { seen.set(row.id, row); } } } if (seen.size === 0) return []; const merged = Array.from(seen.values()).sort((a, b) => a.bm25Score - b.bm25Score // lower = more relevant ); return mapFtsResults(merged, topK); } // -------------------------------------------------- // Helpers // -------------------------------------------------- function mapFtsResults(rows, topK) { return rows.slice(0, topK).map(r => ({ id: r.id, path: r.path, filename: r.filename, summary: r.summary, type: r.type, lastModified: r.lastModified, bm25Score: r.bm25Score, })); } async function expandQueryWithModel(query) { const prompt = ` You are assisting a code search system. Given a natural-language question about a codebase, return a JSON array of 3–8 concrete search terms that are likely to appear literally in source code. Rules: - Return ONLY a JSON array of strings - No explanations - Prefer filenames, function names, symbols, library names Question: "${query}" `.trim(); try { const response = await generate({ content: prompt, query: "", }); const cleaned = await cleanupModule.run({ query, content: response.data, }); const terms = Array.isArray(cleaned.data) ? cleaned.data.filter((t) => typeof t === "string") : []; return terms; } catch (err) { log(`⚠️ [searchFiles] Failed to expand query "${query}": ${String(err)}`); return []; } }