UNPKG

scai

Version:

> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** > **100% local • No token cost • Private by design • GDPR-friendly** — made in Denmark/EU with ❤️.

259 lines (248 loc) 10.2 kB
// File: src/modules/semanticAnalysisStep.ts import { logInputOutput } from "../utils/promptLogHelper.js"; import { generate } from "../lib/generate.js"; import { cleanupModule } from "../pipeline/modules/cleanupModule.js"; const MAX_CODE_CHARS = 6000; // conservative prompt-safe limit export const semanticAnalysisStep = { name: "semanticAnalysis", description: "Query-centric semantic analysis: per-file relevance and optional combined insights.", groups: ["analysis"], run: async (input) => { var _a, _b; const context = input.context; if (!context) throw new Error("[semanticAnalysisStep] No context provided"); const workingFiles = context.workingFiles ?? []; if (!workingFiles.length) { const notes = "[semanticAnalysisStep] No working files loaded; skipping semantic analysis"; logInputOutput("semanticAnalysisStep", "output", { notes }); return { query: input.query, data: { notes }, context }; } context.analysis || (context.analysis = {}); (_a = context.analysis).fileAnalysis || (_a.fileAnalysis = {}); (_b = context.analysis).combinedAnalysis || (_b.combinedAnalysis = {}); const focusFiles = new Set(context.analysis.focus?.relevantFiles ?? []); const filesToAnalyze = workingFiles.filter(f => focusFiles.size === 0 || focusFiles.has(f.path)); // ---------------------------- // 1️⃣ Per-file semantic analysis // ---------------------------- for (const file of filesToAnalyze) { const filePath = file.path; // Preserve existing caching behavior if (context.analysis.fileAnalysis[filePath]) continue; const fileAnalysis = await analyzeFile(file, input.query); context.analysis.fileAnalysis[filePath] = fileAnalysis; logInputOutput("semanticAnalysisStep - per-file", "output", { file: filePath, analysis: fileAnalysis, }); } // ---------------------------- // 2️⃣ Cross-file combined analysis // Only use files marked as relevant // Skip if ≤ 2 relevant files // ---------------------------- const relevantFileAnalysis = Object.fromEntries(Object.entries(context.analysis.fileAnalysis) .filter(([_, fa]) => fa.intent && fa.intent !== "irrelevant")); let combinedAnalysis = { sharedPatterns: [], architectureSummary: "[skipped]", hotspots: [], }; if (Object.keys(relevantFileAnalysis).length > 2) { combinedAnalysis = await analyzeCombined(relevantFileAnalysis, input.query); context.analysis.combinedAnalysis = combinedAnalysis; logInputOutput("semanticAnalysisStep - combined", "output", combinedAnalysis); } else { context.analysis.combinedAnalysis = combinedAnalysis; } return { query: input.query, data: { notes: "Query-centric semantic analysis completed" }, context, }; }, }; async function analyzeFile(file, query, context) { const slicedCode = sliceCodeForAnalysis(file.code); // Gather focus/context info from preFileSearchCheckStep and problem understanding const focus = context?.analysis?.focus; const understanding = context?.analysis?.understanding; const rationaleSnippet = focus?.rationale ? `Rationale: ${focus.rationale}` : ""; const assumptionsSnippet = understanding?.assumptions ? `Assumptions: ${understanding.assumptions.join("; ")}` : ""; const constraintsSnippet = understanding?.constraints ? `Constraints: ${understanding.constraints.join("; ")}` : ""; const risksSnippet = understanding?.risks ? `Known risks: ${understanding.risks.join("; ")}` : ""; const contextSnippet = [rationaleSnippet, assumptionsSnippet, constraintsSnippet, risksSnippet] .filter(Boolean) .join("\n"); const prompt = ` You are analyzing a single file in the context of a user query. User query: "${query}" Context from pre-file selection and problem understanding (use this to guide your relevance judgment): ${contextSnippet} Important: - You may be seeing a PARTIAL view of the file due to size limits. - Decide relevance based on the available content, file path, and naming. Your task: 1. Decide whether this file is relevant to answering the query. 2. If irrelevant: - Set intent to "irrelevant" - Briefly explain why in the relevance field - Explicitly state that the file should NOT be modified 3. If relevant: - Set intent to "relevant" - Assign a role: "primary", "supporting", or "contextual" - Explain in 12 sentences how this file contributes to the query - Explicitly state whether this file should be modified to satisfy the query - Describe what changes (if any) should be made in this file (describe WHAT and WHY, not HOW) Return STRICT JSON with this shape: { "intent": "relevant" | "irrelevant", "relevance": string, "role"?: "primary" | "supporting" | "contextual", "action"?: { "shouldModify": boolean }, "proposedChanges"?: { "summary": string, "scope": "none" | "minor" | "moderate" | "major", "targets"?: string[], "rationale"?: string }, "risks"?: string[] } File path: ${file.path} Code excerpt: ${slicedCode ?? "[no code]"} `.trim(); try { const ai = await generate({ query: file.path, content: prompt }); const cleaned = await cleanupModule.run({ query, content: ai.data }); let data; if (typeof cleaned.data === "object" && cleaned.data) { data = cleaned.data; } else { try { data = JSON.parse(String(cleaned.content ?? "{}")); } catch { console.warn(` - [semanticAnalysisStep] Non-JSON output for ${file.path}, defaulting to irrelevant`); data = {}; } } const intent = data.intent === "relevant" || data.intent === "irrelevant" ? data.intent : "irrelevant"; const shouldModify = intent === "relevant" && data.action?.shouldModify === true && data.proposedChanges?.scope && data.proposedChanges.scope !== "none"; return { intent, relevance: typeof data.relevance === "string" && data.relevance.trim() ? data.relevance : intent === "relevant" ? "This file appears relevant to the query." : "This file does not appear relevant to answering the query.", role: intent === "relevant" && (data.role === "primary" || data.role === "supporting" || data.role === "contextual") ? data.role : undefined, action: { shouldModify }, proposedChanges: intent === "relevant" && data.proposedChanges ? { summary: String(data.proposedChanges.summary ?? ""), scope: data.proposedChanges.scope ?? "none", targets: Array.isArray(data.proposedChanges.targets) ? data.proposedChanges.targets : undefined, rationale: typeof data.proposedChanges.rationale === "string" ? data.proposedChanges.rationale : undefined, } : { summary: "No changes are required in this file for the given query.", scope: "none" }, risks: Array.isArray(data.risks) ? data.risks : [], }; } catch (err) { console.warn(` - [semanticAnalysisStep] Failed to analyze file ${file.path}:`, err); return { intent: "irrelevant", relevance: "This file could not be analyzed due to an error.", action: { shouldModify: false }, risks: [], }; } } /* ----------------------------------------- Helper: slice code for semantic analysis -------------------------------------------- */ function sliceCodeForAnalysis(code) { if (!code) return undefined; if (code.length <= MAX_CODE_CHARS) return code; const head = code.slice(0, Math.floor(MAX_CODE_CHARS * 0.6)); const tail = code.slice(-Math.floor(MAX_CODE_CHARS * 0.4)); return [ head, "\n/* … file truncated for semantic analysis … */\n", tail, ].join(""); } /* ----------------------------------------- Helper: analyzeCombined Only for relevant files, concise -------------------------------------------- */ async function analyzeCombined(fileAnalysis, query) { const prompt = ` You are given per-file semantic analysis relevant to a user query. User query: "${query}" Per-file analysis: ${JSON.stringify(fileAnalysis, null, 2)} Task: - Identify shared patterns across these files - Summarize high-level architecture relevant to the query - List any cross-cutting hotspots or risks Return STRICT JSON: { "sharedPatterns"?: string[], "architectureSummary"?: string, "hotspots"?: string[] } `.trim(); try { const ai = await generate({ query: "cross-file analysis", content: prompt }); const cleaned = await cleanupModule.run({ query, content: ai.data }); const data = typeof cleaned.data === "object" && cleaned.data ? cleaned.data : JSON.parse(String(cleaned.content ?? "{}")); return { sharedPatterns: data.sharedPatterns ?? [], architectureSummary: data.architectureSummary ?? "[unparsed]", hotspots: data.hotspots ?? [], }; } catch (err) { console.warn("[semanticAnalysisStep] Failed to parse combined analysis:", err); return { sharedPatterns: [], architectureSummary: "[unparsed]", hotspots: [], }; } }