scai
Version:
> **A local-first AI CLI for understanding, querying, and iterating on large codebases.** > **100% local • No token costs • No cloud • No prompt injection • Private by design**
180 lines (172 loc) • 7.37 kB
JavaScript
// File: src/modules/preFileSearchCheckStep.ts
import { generate } from "../lib/generate.js";
import { logInputOutput } from "../utils/promptLogHelper.js";
import { cleanupModule } from "../pipeline/modules/cleanupModule.js";
import path from "path";
export async function fileCheckStep(context) {
var _a;
context.analysis ?? (context.analysis = {});
(_a = context.analysis).focus ?? (_a.focus = { selectedFiles: [], candidateFiles: [], rationale: "" });
const intent = context.analysis.intent;
const planSuggestion = context.analysis.planSuggestion?.text ?? "";
// Step 1: gather known files from initContext only
const knownFiles = new Set([
...(context.initContext?.relatedFiles ?? []),
]);
// Step 2: extract file names from normalizedQuery or planSuggestion
const extractedFiles = extractFilesFromAnalysis(context.analysis);
// Step 3: populate focus with safe defaults
const selectedFiles = [];
const candidateFiles = [];
for (const file of extractedFiles) {
// Try to find full path in knownFiles by matching basename
const matchedPath = [...knownFiles].find(f => path.basename(f) === file);
if (matchedPath) {
selectedFiles.push(matchedPath);
}
else {
// Store unresolved reference as candidate
candidateFiles.push(file);
}
}
const mergedSelected = Array.from(new Set([
...(context.analysis.focus.selectedFiles ?? []),
...selectedFiles,
]));
const mergedCandidate = Array.from(new Set([
...(context.analysis.focus.candidateFiles ?? []),
...candidateFiles,
])).filter(file => !mergedSelected.includes(file));
context.analysis.focus.selectedFiles = mergedSelected;
context.analysis.focus.candidateFiles = mergedCandidate;
context.analysis.focus.rationale =
`Pre-check: +${selectedFiles.length} selected / +${candidateFiles.length} candidate(s); totals ${mergedSelected.length} selected, ${mergedCandidate.length} candidate(s).`;
// ----------------- Deterministic Evidence Extraction -----------------
const verifyByFile = context.analysis.verify?.byFile ?? {};
const evidenceByFile = Object.entries(verifyByFile).reduce((acc, [filePath, verify]) => {
const evidenceItemsRaw = verify?.evidence ?? [];
if (!evidenceItemsRaw.length)
return acc;
const evidenceItems = evidenceItemsRaw.map(ev => ({
type: ev.type,
claim: ev.claim,
excerpt: ev.excerpt,
span: ev.span,
confidence: ev.confidence
}));
// Deterministic file-level confidence aggregation
const fileConfidence = typeof verify.fileConfidence === "number"
? verify.fileConfidence
: evidenceItems.reduce((sum, e) => sum + (e.confidence ?? 0), 0) / evidenceItems.length;
acc[filePath] = {
fileConfidence: Number(fileConfidence.toFixed(3)),
evidenceCount: evidenceItems.length,
evidence: evidenceItems
};
return acc;
}, {});
// ----------------- Prompt -----------------
const prompt = `
You are an AI meta-agent assisting with context verification.
User intent:
${JSON.stringify(intent ?? {}, null, 2)}
Plan suggestion:
${planSuggestion}
Known files in context:
${JSON.stringify([...knownFiles], null, 2)}
Deterministic evidence per file:
${JSON.stringify(evidenceByFile, null, 2)}
Evidence rules:
- Evidence includes a type ("filename", "symbol", "sentence", "keyword-cluster").
- Each evidence item includes a confidence score between 0 and 1.
- Each file includes an aggregated fileConfidence (average of its evidence confidence).
- Higher confidence and stronger evidence types (e.g. filename, symbol definition) indicate stronger relevance.
- Evidence alone is NOT sufficient — it must align with user intent.
Task:
1. Evaluate file relevance using BOTH fileConfidence and semantic alignment with intent.
2. Prefer files with higher fileConfidence when multiple candidates exist.
3. Only promote files to selectedFiles if they clearly satisfy the user intent.
4. List unresolved candidate files if any.
5. Suggest assumptions, constraints, or risks that may affect execution.
6. Return STRICT JSON with shape:
{
"selectedFiles": string[],
"candidateFiles": string[],
"rationale": string,
"understanding": {
"assumptions"?: string[],
"constraints"?: string[],
"risks"?: string[]
}
}
`.trim();
try {
const ai = await generate({
query: context.initContext?.userQuery ?? '',
content: prompt
});
let cleaned;
try {
cleaned = await cleanupModule.run({
query: context.initContext?.userQuery ?? '',
content: ai.data,
});
}
catch (cleanupErr) {
console.warn("[preFileSearchCheckStep] cleanupModule failed, using raw AI output", cleanupErr);
cleaned = { data: ai.data, content: ai.data };
}
let parsed = {};
try {
if (typeof cleaned.data === "object" && cleaned.data !== null) {
parsed = cleaned.data;
}
else if (typeof cleaned.content === "string") {
parsed = JSON.parse(cleaned.content);
}
}
catch (parseErr) {
console.warn("[preFileSearchCheckStep] Failed to parse cleanup output, using defaults", parseErr);
}
// Merge parsed output safely
if (Array.isArray(parsed.selectedFiles)) {
const existing = new Set(context.analysis.focus.selectedFiles);
context.analysis.focus.selectedFiles = [
...context.analysis.focus.selectedFiles,
...parsed.selectedFiles.filter((f) => !existing.has(f))
];
}
if (Array.isArray(parsed.candidateFiles)) {
const existing = new Set(context.analysis.focus.candidateFiles);
context.analysis.focus.candidateFiles = [
...context.analysis.focus.candidateFiles,
...parsed.candidateFiles.filter((f) => !existing.has(f))
];
}
if (typeof parsed.rationale === "string") {
context.analysis.focus.rationale =
(context.analysis.focus.rationale ?? "") +
"\n[Post-check] " + parsed.rationale;
}
if (parsed.understanding && typeof parsed.understanding === "object") {
context.analysis.understanding = {
...context.analysis.understanding,
...parsed.understanding,
};
}
logInputOutput("preFileSearchCheckStep", "output", parsed);
}
catch (err) {
console.warn("[preFileSearchCheckStep] AI pre-check failed, using defaults", err);
}
// Simple regex-based extractor (always returns array)
function extractFilesFromAnalysis(analysis) {
const sources = [
analysis?.intent?.normalizedQuery ?? "",
analysis?.planSuggestion?.text ?? ""
].join("\n");
const regex = /\b([\w\-\./]+\.js|[\w\-\./]+\.ts)\b/g;
const matches = sources.match(regex);
return matches ? Array.from(new Set(matches.map(m => m.trim()))) : [];
}
}