scai
Version:
> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.
177 lines (176 loc) • 6.59 kB
JavaScript
// src/pipeline/modules/gatherInfoModule.ts
import { getDbForRepo } from "../../db/client.js";
import chalk from "chalk";
import { sanitizeQueryForFts } from "../../utils/sanitizeQuery.js";
import { logInputOutput } from "../../utils/promptLogHelper.js"; // ✅ import logger
/** Escape % and _ for LIKE queries */
function sanitizeForLike(input) {
return input.replace(/[%_]/g, "\\$&");
}
/** ✅ Strip embeddings before logging */
function stripEmbeddings(output) {
if (output?.data?.files) {
output.data.files = output.data.files.map((f) => {
const { embedding, ...rest } = f;
return rest;
});
}
return output;
}
export const gatherInfoModule = {
name: "gatherInfo",
description: "Collects relevant code summaries, functions, classes, and related graph info for the current query.",
run: async function (input) {
const db = getDbForRepo();
const query = input.metadata?.query?.trim() ?? "";
const maxFiles = input.metadata?.maxFiles ?? 10;
if (!query) {
const emptyOutput = {
content: "⚠️ No query provided to gatherInfoModule.",
data: { files: [], functions: [], classes: [], summaries: [], tags: [] },
};
logInputOutput("gatherInfo", "output", stripEmbeddings(emptyOutput)); // ✅
return emptyOutput;
}
const sanitizedFts = sanitizeQueryForFts(query);
const likeQuery = `%${sanitizeForLike(query)}%`;
// 🩹 Handle legacy DBs that might not have `functions_extracted`
let files = [];
try {
files = db
.prepare(`
SELECT f.id, f.path, f.type, f.summary, f.embedding,
f.last_modified, f.indexed_at,
COALESCE(f.functions_extracted, 0) AS functions_extracted,
f.functions_extracted_at, f.processing_status
FROM files f
JOIN files_fts fts ON fts.rowid = f.id
WHERE files_fts MATCH '${sanitizedFts}'
ORDER BY f.path ASC
LIMIT ?;
`)
.all(maxFiles);
}
catch (err) {
console.warn(chalk.yellow("⚠️ 'functions_extracted' column missing in files table, running fallback query..."));
files = db
.prepare(`
SELECT f.id, f.path, f.type, f.summary, f.embedding,
f.last_modified, f.indexed_at, f.processing_status
FROM files f
JOIN files_fts fts ON fts.rowid = f.id
WHERE files_fts MATCH '${sanitizedFts}'
ORDER BY f.path ASC
LIMIT ?;
`)
.all(maxFiles);
}
const functions = db
.prepare(`
SELECT fn.id, fn.name, fn.start_line, fn.end_line,
substr(fn.content, 1, 400) AS content,
f.path AS file_path
FROM functions fn
JOIN files f ON f.id = fn.file_id
WHERE fn.name LIKE ? ESCAPE '\\'
OR fn.content LIKE ? ESCAPE '\\'
ORDER BY f.path ASC
LIMIT ?;
`)
.all(likeQuery, likeQuery, maxFiles);
const classes = db
.prepare(`
SELECT c.name, f.path AS file_path, substr(c.content, 1, 400) AS snippet
FROM graph_classes c
JOIN files f ON f.id = c.file_id
WHERE c.name LIKE ? ESCAPE '\\'
OR c.content LIKE ? ESCAPE '\\'
ORDER BY f.path ASC
LIMIT ?;
`)
.all(likeQuery, likeQuery, maxFiles);
const tagRows = db
.prepare(`
SELECT DISTINCT gtm.name
FROM graph_tags_master gtm
WHERE gtm.name LIKE ? ESCAPE '\\'
ORDER BY gtm.name ASC;
`)
.all(likeQuery);
const tags = tagRows.map((t) => t.name);
const summaries = db
.prepare(`
SELECT path, type, summary
FROM summaries
WHERE summary LIKE ? ESCAPE '\\'
ORDER BY CASE WHEN type='project' THEN 0 ELSE 1 END, path ASC
LIMIT 5;
`)
.all(likeQuery);
// --- Gather common project config files
let configFiles = [];
try {
configFiles = db
.prepare(`
SELECT f.id, f.path, f.type, f.summary, f.embedding,
f.last_modified, f.indexed_at,
COALESCE(f.functions_extracted, 0) AS functions_extracted,
f.functions_extracted_at, f.processing_status
FROM files f
WHERE path LIKE '%package.json%'
OR path LIKE '%tsconfig.json%'
OR path LIKE '%.eslintrc%'
OR path LIKE '%.env%'
OR path LIKE '%README.md%'
ORDER BY path ASC;
`)
.all();
}
catch (err) {
configFiles = db
.prepare(`
SELECT f.id, f.path, f.type, f.summary, f.embedding,
f.last_modified, f.indexed_at, f.processing_status
FROM files f
WHERE path LIKE '%package.json%'
OR path LIKE '%tsconfig.json%'
OR path LIKE '%.eslintrc%'
OR path LIKE '%.env%'
OR path LIKE '%README.md%'
ORDER BY path ASC;
`)
.all();
}
// Merge & deduplicate
const uniqueFiles = [
...files,
...configFiles.filter((c) => !files.find((f) => f.path === c.path)),
];
if (!uniqueFiles.length &&
!functions.length &&
!classes.length &&
!summaries.length &&
!tags.length) {
const emptyOutput = {
content: `⚠️ No relevant information found for query: "${query}"`,
data: { files: [], functions: [], classes: [], summaries: [], tags: [] },
};
logInputOutput("gatherInfo", "output", stripEmbeddings(emptyOutput)); // ✅
return emptyOutput;
}
const output = {
content: `Gathered ${uniqueFiles.length} files, ${functions.length} functions, ${classes.length} classes, and ${summaries.length} summaries.`,
data: {
query, // ✅ ensure query passes forward
files: uniqueFiles,
functions,
classes,
summaries,
tags,
},
};
// ✅ Log sanitized module output (no embeddings)
logInputOutput("gatherInfo", "output", stripEmbeddings(output));
return output;
},
};