UNPKG

scai

Version:

> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.

177 lines (176 loc) 6.59 kB
// src/pipeline/modules/gatherInfoModule.ts import { getDbForRepo } from "../../db/client.js"; import chalk from "chalk"; import { sanitizeQueryForFts } from "../../utils/sanitizeQuery.js"; import { logInputOutput } from "../../utils/promptLogHelper.js"; // ✅ import logger /** Escape % and _ for LIKE queries */ function sanitizeForLike(input) { return input.replace(/[%_]/g, "\\$&"); } /** ✅ Strip embeddings before logging */ function stripEmbeddings(output) { if (output?.data?.files) { output.data.files = output.data.files.map((f) => { const { embedding, ...rest } = f; return rest; }); } return output; } export const gatherInfoModule = { name: "gatherInfo", description: "Collects relevant code summaries, functions, classes, and related graph info for the current query.", run: async function (input) { const db = getDbForRepo(); const query = input.metadata?.query?.trim() ?? ""; const maxFiles = input.metadata?.maxFiles ?? 10; if (!query) { const emptyOutput = { content: "⚠️ No query provided to gatherInfoModule.", data: { files: [], functions: [], classes: [], summaries: [], tags: [] }, }; logInputOutput("gatherInfo", "output", stripEmbeddings(emptyOutput)); // ✅ return emptyOutput; } const sanitizedFts = sanitizeQueryForFts(query); const likeQuery = `%${sanitizeForLike(query)}%`; // 🩹 Handle legacy DBs that might not have `functions_extracted` let files = []; try { files = db .prepare(` SELECT f.id, f.path, f.type, f.summary, f.embedding, f.last_modified, f.indexed_at, COALESCE(f.functions_extracted, 0) AS functions_extracted, f.functions_extracted_at, f.processing_status FROM files f JOIN files_fts fts ON fts.rowid = f.id WHERE files_fts MATCH '${sanitizedFts}' ORDER BY f.path ASC LIMIT ?; `) .all(maxFiles); } catch (err) { console.warn(chalk.yellow("⚠️ 'functions_extracted' column missing in files table, running fallback query...")); files = db .prepare(` SELECT f.id, f.path, f.type, f.summary, f.embedding, f.last_modified, f.indexed_at, f.processing_status FROM files f JOIN files_fts fts ON fts.rowid = f.id WHERE files_fts MATCH '${sanitizedFts}' ORDER BY f.path ASC LIMIT ?; `) .all(maxFiles); } const functions = db .prepare(` SELECT fn.id, fn.name, fn.start_line, fn.end_line, substr(fn.content, 1, 400) AS content, f.path AS file_path FROM functions fn JOIN files f ON f.id = fn.file_id WHERE fn.name LIKE ? ESCAPE '\\' OR fn.content LIKE ? ESCAPE '\\' ORDER BY f.path ASC LIMIT ?; `) .all(likeQuery, likeQuery, maxFiles); const classes = db .prepare(` SELECT c.name, f.path AS file_path, substr(c.content, 1, 400) AS snippet FROM graph_classes c JOIN files f ON f.id = c.file_id WHERE c.name LIKE ? ESCAPE '\\' OR c.content LIKE ? ESCAPE '\\' ORDER BY f.path ASC LIMIT ?; `) .all(likeQuery, likeQuery, maxFiles); const tagRows = db .prepare(` SELECT DISTINCT gtm.name FROM graph_tags_master gtm WHERE gtm.name LIKE ? ESCAPE '\\' ORDER BY gtm.name ASC; `) .all(likeQuery); const tags = tagRows.map((t) => t.name); const summaries = db .prepare(` SELECT path, type, summary FROM summaries WHERE summary LIKE ? ESCAPE '\\' ORDER BY CASE WHEN type='project' THEN 0 ELSE 1 END, path ASC LIMIT 5; `) .all(likeQuery); // --- Gather common project config files let configFiles = []; try { configFiles = db .prepare(` SELECT f.id, f.path, f.type, f.summary, f.embedding, f.last_modified, f.indexed_at, COALESCE(f.functions_extracted, 0) AS functions_extracted, f.functions_extracted_at, f.processing_status FROM files f WHERE path LIKE '%package.json%' OR path LIKE '%tsconfig.json%' OR path LIKE '%.eslintrc%' OR path LIKE '%.env%' OR path LIKE '%README.md%' ORDER BY path ASC; `) .all(); } catch (err) { configFiles = db .prepare(` SELECT f.id, f.path, f.type, f.summary, f.embedding, f.last_modified, f.indexed_at, f.processing_status FROM files f WHERE path LIKE '%package.json%' OR path LIKE '%tsconfig.json%' OR path LIKE '%.eslintrc%' OR path LIKE '%.env%' OR path LIKE '%README.md%' ORDER BY path ASC; `) .all(); } // Merge & deduplicate const uniqueFiles = [ ...files, ...configFiles.filter((c) => !files.find((f) => f.path === c.path)), ]; if (!uniqueFiles.length && !functions.length && !classes.length && !summaries.length && !tags.length) { const emptyOutput = { content: `⚠️ No relevant information found for query: "${query}"`, data: { files: [], functions: [], classes: [], summaries: [], tags: [] }, }; logInputOutput("gatherInfo", "output", stripEmbeddings(emptyOutput)); // ✅ return emptyOutput; } const output = { content: `Gathered ${uniqueFiles.length} files, ${functions.length} functions, ${classes.length} classes, and ${summaries.length} summaries.`, data: { query, // ✅ ensure query passes forward files: uniqueFiles, functions, classes, summaries, tags, }, }; // ✅ Log sanitized module output (no embeddings) logInputOutput("gatherInfo", "output", stripEmbeddings(output)); return output; }, };