scai
Version:
> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.
145 lines (139 loc) • 5.93 kB
JavaScript
// File: src/modules/semanticAnalysisStep.ts
import { logInputOutput } from "../utils/promptLogHelper.js";
import { generate } from "../lib/generate.js";
import { cleanupModule } from "../pipeline/modules/cleanupModule.js";
export const semanticAnalysisStep = {
name: "semanticAnalysis",
description: "Query-centric semantic analysis: per-file relevance and optional combined insights.",
groups: ["analysis"],
run: async (input) => {
var _a, _b;
const context = input.context;
if (!context)
throw new Error("[semanticAnalysisStep] No context provided");
const workingFiles = context.workingFiles ?? [];
if (!workingFiles.length) {
const notes = "[semanticAnalysisStep] No working files loaded; skipping semantic analysis";
logInputOutput("semanticAnalysisStep", "output", { notes });
return { query: input.query, data: { notes }, context };
}
context.analysis || (context.analysis = {});
(_a = context.analysis).fileAnalysis || (_a.fileAnalysis = {});
(_b = context.analysis).combinedAnalysis || (_b.combinedAnalysis = {});
// Get the files relevant to this query
const focusFiles = new Set(context.analysis.focus?.relevantFiles ?? []);
const filesToAnalyze = workingFiles.filter(f => focusFiles.has(f.path) || focusFiles.size === 0);
// ----------------------------
// 1️⃣ Per-file relevance analysis
// ----------------------------
for (const file of filesToAnalyze) {
const filePath = file.path;
if (context.analysis.fileAnalysis[filePath])
continue;
const fileAnalysis = await analyzeFile(file, input.query);
context.analysis.fileAnalysis[filePath] = fileAnalysis;
logInputOutput("semanticAnalysisStep - per-file", "output", { file: filePath, analysis: fileAnalysis });
}
// ----------------------------
// 2️⃣ Cross-file combined analysis
// Only use files marked as relevant
// Skip if ≤ 2 relevant files
// ----------------------------
const relevantFileAnalysis = Object.fromEntries(Object.entries(context.analysis.fileAnalysis)
.filter(([_, fa]) => fa.intent && fa.intent !== "irrelevant"));
let combinedAnalysis = { sharedPatterns: [], architectureSummary: "[skipped]", hotspots: [] };
if (Object.keys(relevantFileAnalysis).length > 2) {
combinedAnalysis = await analyzeCombined(relevantFileAnalysis, input.query);
context.analysis.combinedAnalysis = combinedAnalysis;
logInputOutput("semanticAnalysisStep - combined", "output", combinedAnalysis);
}
else {
context.analysis.combinedAnalysis = combinedAnalysis;
}
return {
query: input.query,
data: { notes: "Query-centric semantic analysis completed" },
context,
};
},
};
async function analyzeFile(file, query) {
const prompt = `
You are given a user query:
"${query}"
Task:
- Determine if an analysis of this file is relevant to answering the query.
- If relevant, describe in 1-2 sentences how it contributes to answering the query.
- If irrelevant, set intent to "irrelevant".
- Optionally, include any risks that may affect the query's outcome.
Return STRICT JSON:
{
"intent": string,
"risks"?: string[]
}
File path: ${file.path}
Code snippet: ${file.code ?? "[no code]"}
`;
try {
const ai = await generate({ query: file.path, content: prompt });
const cleaned = await cleanupModule.run({ query, content: ai.data });
let data;
// Attempt to use structured data if available
if (typeof cleaned.data === "object" && cleaned.data) {
data = cleaned.data;
}
else {
try {
data = JSON.parse(String(cleaned.content ?? "{}"));
}
catch {
console.warn(`[semanticAnalysisStep] Non-JSON output for ${file.path}, defaulting to irrelevant`);
data = { intent: "irrelevant", risks: [] };
}
}
return {
intent: data.intent ?? "irrelevant",
risks: Array.isArray(data.risks) ? data.risks : []
};
}
catch (err) {
console.warn(`[semanticAnalysisStep] Failed to analyze file ${file.path}:`, err);
return { intent: "irrelevant", risks: [] };
}
}
/* -----------------------------------------
Helper: analyzeCombined
Only for relevant files, concise
-------------------------------------------- */
async function analyzeCombined(fileAnalysis, query) {
const prompt = `
You are given per-file analysis relevant to the query:
${JSON.stringify(fileAnalysis, null, 2)}
Task:
- Identify shared patterns across these files in the context of the query
- Summarize high-level architecture relevant to answering the query
- List any hotspots or risks that could impact the query's outcome
Return STRICT JSON:
{
"sharedPatterns"?: string[],
"architectureSummary"?: string,
"hotspots"?: string[]
}
`;
try {
const ai = await generate({ query: "cross-file analysis", content: prompt });
const cleaned = await cleanupModule.run({ query, content: ai.data });
const data = typeof cleaned.data === "object" && cleaned.data
? cleaned.data
: JSON.parse(String(cleaned.content ?? "{}"));
return {
sharedPatterns: data.sharedPatterns ?? [],
architectureSummary: data.architectureSummary ?? "[unparsed]",
hotspots: data.hotspots ?? []
};
}
catch (err) {
console.warn("[semanticAnalysisStep] Failed to parse combined analysis:", err);
return { sharedPatterns: [], architectureSummary: "[unparsed]", hotspots: [] };
}
}