scai
Version:
> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** > **100% local • No token cost • Private by design • GDPR-friendly** — made in Denmark/EU with ❤️.
259 lines (248 loc) • 10.2 kB
JavaScript
// File: src/modules/semanticAnalysisStep.ts
import { logInputOutput } from "../utils/promptLogHelper.js";
import { generate } from "../lib/generate.js";
import { cleanupModule } from "../pipeline/modules/cleanupModule.js";
const MAX_CODE_CHARS = 6000; // conservative prompt-safe limit
export const semanticAnalysisStep = {
name: "semanticAnalysis",
description: "Query-centric semantic analysis: per-file relevance and optional combined insights.",
groups: ["analysis"],
run: async (input) => {
var _a, _b;
const context = input.context;
if (!context)
throw new Error("[semanticAnalysisStep] No context provided");
const workingFiles = context.workingFiles ?? [];
if (!workingFiles.length) {
const notes = "[semanticAnalysisStep] No working files loaded; skipping semantic analysis";
logInputOutput("semanticAnalysisStep", "output", { notes });
return { query: input.query, data: { notes }, context };
}
context.analysis || (context.analysis = {});
(_a = context.analysis).fileAnalysis || (_a.fileAnalysis = {});
(_b = context.analysis).combinedAnalysis || (_b.combinedAnalysis = {});
const focusFiles = new Set(context.analysis.focus?.relevantFiles ?? []);
const filesToAnalyze = workingFiles.filter(f => focusFiles.size === 0 || focusFiles.has(f.path));
// ----------------------------
// 1️⃣ Per-file semantic analysis
// ----------------------------
for (const file of filesToAnalyze) {
const filePath = file.path;
// Preserve existing caching behavior
if (context.analysis.fileAnalysis[filePath])
continue;
const fileAnalysis = await analyzeFile(file, input.query);
context.analysis.fileAnalysis[filePath] = fileAnalysis;
logInputOutput("semanticAnalysisStep - per-file", "output", {
file: filePath,
analysis: fileAnalysis,
});
}
// ----------------------------
// 2️⃣ Cross-file combined analysis
// Only use files marked as relevant
// Skip if ≤ 2 relevant files
// ----------------------------
const relevantFileAnalysis = Object.fromEntries(Object.entries(context.analysis.fileAnalysis)
.filter(([_, fa]) => fa.intent && fa.intent !== "irrelevant"));
let combinedAnalysis = {
sharedPatterns: [],
architectureSummary: "[skipped]",
hotspots: [],
};
if (Object.keys(relevantFileAnalysis).length > 2) {
combinedAnalysis = await analyzeCombined(relevantFileAnalysis, input.query);
context.analysis.combinedAnalysis = combinedAnalysis;
logInputOutput("semanticAnalysisStep - combined", "output", combinedAnalysis);
}
else {
context.analysis.combinedAnalysis = combinedAnalysis;
}
return {
query: input.query,
data: { notes: "Query-centric semantic analysis completed" },
context,
};
},
};
async function analyzeFile(file, query, context) {
const slicedCode = sliceCodeForAnalysis(file.code);
// Gather focus/context info from preFileSearchCheckStep and problem understanding
const focus = context?.analysis?.focus;
const understanding = context?.analysis?.understanding;
const rationaleSnippet = focus?.rationale ? `Rationale: ${focus.rationale}` : "";
const assumptionsSnippet = understanding?.assumptions
? `Assumptions: ${understanding.assumptions.join("; ")}`
: "";
const constraintsSnippet = understanding?.constraints
? `Constraints: ${understanding.constraints.join("; ")}`
: "";
const risksSnippet = understanding?.risks
? `Known risks: ${understanding.risks.join("; ")}`
: "";
const contextSnippet = [rationaleSnippet, assumptionsSnippet, constraintsSnippet, risksSnippet]
.filter(Boolean)
.join("\n");
const prompt = `
You are analyzing a single file in the context of a user query.
User query:
"${query}"
Context from pre-file selection and problem understanding (use this to guide your relevance judgment):
${contextSnippet}
Important:
- You may be seeing a PARTIAL view of the file due to size limits.
- Decide relevance based on the available content, file path, and naming.
Your task:
1. Decide whether this file is relevant to answering the query.
2. If irrelevant:
- Set intent to "irrelevant"
- Briefly explain why in the relevance field
- Explicitly state that the file should NOT be modified
3. If relevant:
- Set intent to "relevant"
- Assign a role: "primary", "supporting", or "contextual"
- Explain in 1–2 sentences how this file contributes to the query
- Explicitly state whether this file should be modified to satisfy the query
- Describe what changes (if any) should be made in this file
(describe WHAT and WHY, not HOW)
Return STRICT JSON with this shape:
{
"intent": "relevant" | "irrelevant",
"relevance": string,
"role"?: "primary" | "supporting" | "contextual",
"action"?: {
"shouldModify": boolean
},
"proposedChanges"?: {
"summary": string,
"scope": "none" | "minor" | "moderate" | "major",
"targets"?: string[],
"rationale"?: string
},
"risks"?: string[]
}
File path: ${file.path}
Code excerpt:
${slicedCode ?? "[no code]"}
`.trim();
try {
const ai = await generate({ query: file.path, content: prompt });
const cleaned = await cleanupModule.run({ query, content: ai.data });
let data;
if (typeof cleaned.data === "object" && cleaned.data) {
data = cleaned.data;
}
else {
try {
data = JSON.parse(String(cleaned.content ?? "{}"));
}
catch {
console.warn(` - [semanticAnalysisStep] Non-JSON output for ${file.path}, defaulting to irrelevant`);
data = {};
}
}
const intent = data.intent === "relevant" || data.intent === "irrelevant"
? data.intent
: "irrelevant";
const shouldModify = intent === "relevant" &&
data.action?.shouldModify === true &&
data.proposedChanges?.scope &&
data.proposedChanges.scope !== "none";
return {
intent,
relevance: typeof data.relevance === "string" && data.relevance.trim()
? data.relevance
: intent === "relevant"
? "This file appears relevant to the query."
: "This file does not appear relevant to answering the query.",
role: intent === "relevant" &&
(data.role === "primary" ||
data.role === "supporting" ||
data.role === "contextual")
? data.role
: undefined,
action: { shouldModify },
proposedChanges: intent === "relevant" && data.proposedChanges
? {
summary: String(data.proposedChanges.summary ?? ""),
scope: data.proposedChanges.scope ?? "none",
targets: Array.isArray(data.proposedChanges.targets)
? data.proposedChanges.targets
: undefined,
rationale: typeof data.proposedChanges.rationale === "string"
? data.proposedChanges.rationale
: undefined,
}
: { summary: "No changes are required in this file for the given query.", scope: "none" },
risks: Array.isArray(data.risks) ? data.risks : [],
};
}
catch (err) {
console.warn(` - [semanticAnalysisStep] Failed to analyze file ${file.path}:`, err);
return {
intent: "irrelevant",
relevance: "This file could not be analyzed due to an error.",
action: { shouldModify: false },
risks: [],
};
}
}
/* -----------------------------------------
Helper: slice code for semantic analysis
-------------------------------------------- */
function sliceCodeForAnalysis(code) {
if (!code)
return undefined;
if (code.length <= MAX_CODE_CHARS)
return code;
const head = code.slice(0, Math.floor(MAX_CODE_CHARS * 0.6));
const tail = code.slice(-Math.floor(MAX_CODE_CHARS * 0.4));
return [
head,
"\n/* … file truncated for semantic analysis … */\n",
tail,
].join("");
}
/* -----------------------------------------
Helper: analyzeCombined
Only for relevant files, concise
-------------------------------------------- */
async function analyzeCombined(fileAnalysis, query) {
const prompt = `
You are given per-file semantic analysis relevant to a user query.
User query:
"${query}"
Per-file analysis:
${JSON.stringify(fileAnalysis, null, 2)}
Task:
- Identify shared patterns across these files
- Summarize high-level architecture relevant to the query
- List any cross-cutting hotspots or risks
Return STRICT JSON:
{
"sharedPatterns"?: string[],
"architectureSummary"?: string,
"hotspots"?: string[]
}
`.trim();
try {
const ai = await generate({ query: "cross-file analysis", content: prompt });
const cleaned = await cleanupModule.run({ query, content: ai.data });
const data = typeof cleaned.data === "object" && cleaned.data
? cleaned.data
: JSON.parse(String(cleaned.content ?? "{}"));
return {
sharedPatterns: data.sharedPatterns ?? [],
architectureSummary: data.architectureSummary ?? "[unparsed]",
hotspots: data.hotspots ?? [],
};
}
catch (err) {
console.warn("[semanticAnalysisStep] Failed to parse combined analysis:", err);
return {
sharedPatterns: [],
architectureSummary: "[unparsed]",
hotspots: [],
};
}
}