UNPKG

scai

Version:

> **A local-first AI CLI for understanding, querying, and iterating on large codebases.** > **100% local • No token costs • No cloud • No prompt injection • Private by design**

github.com/rzs/scai

1,047 lines • 78 kB

JavaScript

import { builtInModules } from "../pipeline/registry/moduleRegistry.js"; import { logInputOutput } from "../utils/promptLogHelper.js"; import { infoPlanGenStep } from "./infoPlanGenStep.js"; import { understandIntentStep } from "./understandIntentStep.js"; import { transformPlanGenStep } from "./transformPlanGenStep.js"; import { getDbForRepo } from "../db/client.js"; import { writeFileStep } from "./writeFileStep.js"; import { resolveExecutionModeStep } from "./resolveExecutionModeStep.js"; import { fileCheckStep } from "./fileCheckStep.js"; import { analysisPlanGenStep } from "./analysisPlanGenStep.js"; import { readinessGateStep } from "./readinessGateStep.js"; import { scopeClassificationStep } from "./scopeClassificationStep.js"; import { routingDecisionStep } from "./routingDecisionStep.js"; import { evidenceVerifierStep } from "./evidenceVerifierStep.js"; import { validateChangesStep } from './validateChangesStep.js'; import { reasonNextTaskStep } from './reasonNextTaskStep.js'; import { collaboratorStep } from './collaboratorStep.js'; import { integrateFeedbackStep } from './integrateFeedbackStep.js'; import { researchPlanGenStep } from "./researchPlanGenStep.js"; import { selectRelevantSourcesStep } from "./selectRelevantSourcesStep.js"; import { iterationFileSelector } from "./iterationFileSelector.js"; import { finalAnswerModule } from "../pipeline/modules/finalAnswerModule.js"; import { reasonNextStep } from "./reasonNextStep.js"; import { buildLightContext } from "../utils/buildContextualPrompt.js"; import { semanticSearchFiles } from "../db/fileIndex.js"; import { NUM_TOPFILES, RELATED_FILES_LIMIT } from "../constants.js"; import { structuralPreloadStep } from "./structuralPreloadStep.js"; import { extractFileReferences } from "../utils/extractFileReferences.js"; import { PREFILTER_STOP_WORDS } from "../fileRules/stopWords.js"; import { MAX_WELL_KNOWN_REPO_FILES, WELL_KNOWN_REPO_FILE_BASENAMES } from "../fileRules/wellKnownRepoFiles.js"; import chalk from "chalk"; import path from "path"; import fs from "fs"; /* ───────────────────────── registry ───────────────────────── */ const MODULE_REGISTRY = Object.fromEntries(Object.entries(builtInModules).map(([name, mod]) => [name, mod])); function resolveModuleForAction(action) { return MODULE_REGISTRY[action]; } /* ───────────────────────── agent ───────────────────────── */ /** * MainAgent is the core orchestrator of the SCAI agent system. * It manages the execution flow, coordinates steps, and handles context management. * * The agent follows a multi-phase execution model: * 1. Boot: Determine intent and execution mode * 2. Precheck: File existence validation * 3. Scope: Determine where to act and what actions are allowed * 4. Grounding & Readiness Loop: Acquire evidence and verify readiness * 5. Analysis: Perform in-depth analysis * 6. Transform: Generate and execute transformations * 7. Finalize: Complete the task and persist data */ export class MainAgent { /** * Creates a new MainAgent instance. * * @param context - The structured context for the agent. * @param ui - The UI interface for agent communication. */ constructor(context, ui) { this.runCount = 0; this.context = context; this.query = context.initContext?.userQuery ?? ""; this.ui = ui; } /* ───────────── main run ───────────── */ async run() { try { this.runCount = 0; await this.runBoot(); await this.runScope(); await this.runSearch(); await this.runVerify(); await this.runResearch(); const canProceedToExecution = this.isResearchGateSatisfied(); if (canProceedToExecution) { await this.runPlan(); await this.runWorkLoop(); } await this.runFinalize(); } finally { this.ui.stop(); // ← guaranteed cleanup } } /* ───────────── boot ───────────── */ async runBoot() { var _a; await understandIntentStep.run({ context: this.context }); // Boot the task and get the real DB taskId this.taskId = bootTaskForRepo(this.context, getDbForRepo(), (phase, step, ms, desc) => this.logLine(phase, step, ms, desc, { highlight: true })); (_a = this.context).task || (_a.task = { id: this.taskId, projectId: 0, status: "active", initialQuery: this.context.initContext?.userQuery ?? "", createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(), taskSteps: [], }); this.context.task.id = this.taskId; this.logLine("TASK", "Boot complete", undefined, `taskId=${this.taskId}`, { highlight: true }); } /* ───────────── scope ───────────── */ async runScope() { await scopeClassificationStep.run(this.context); await resolveExecutionModeStep.run(this.context); await routingDecisionStep.run(this.context); const routing = this.context.analysis?.routingDecision; if (routing) { this.logLine("TASK", "Routing decision", undefined, `${routing.decision} | search=${routing.allowSearch} | research=${routing.allowResearch} | transform=${routing.allowTransform} | scopeLocked=${routing.scopeLocked}`); } this.logLine("TASK", "Scope classification complete"); } /* ───────────── search ───────────── */ /** * Seeds initial candidate files using semantic retrieval + deterministic prefilter. * Example: query mentions "MainAgent" -> relatedFiles are narrowed before grounding. */ async runSearch() { const { rawUserQuery, retrievalQuery } = this.resolveInitialRetrievalQueries(); const t = this.startTimer(); try { const results = await this.fetchInitialRetrievalResults(retrievalQuery); const promptArgs = this.buildInitialRetrievalPromptArgs(results, retrievalQuery); const seededContext = await buildLightContext(promptArgs); const mergedRelatedCount = this.mergeSeededInitialContext(rawUserQuery, seededContext); const prefilter = this.applyDeterministicPreGroundingPrefilter(retrievalQuery); const repoDefaults = this.injectWellKnownRepoFiles(prefilter.after); this.logLine("ANALYSIS", "initialRetrieval", t(), `${results.length} result(s), ${mergedRelatedCount} candidate file(s), prefilter ${prefilter.before} -> ${prefilter.after}, defaults +${repoDefaults.added} (${repoDefaults.reason})`); } catch (err) { this.logLine("ANALYSIS", "initialRetrieval", t(), `failed: ${String(err)}`); } } /* ───────────── verify ───────────── */ /** * Wave-based verify loop (evidence -> readiness -> optional info acquisition). * Example: if readiness stays not-ready, run an info plan and try another wave. */ async runVerify() { let ready = false; const maxGroundingWaves = this.getGroundingWaveBudget(); let groundingWave = 0; let stagnantWaves = 0; const MAX_STAGNANT_WAVES = 2; while (groundingWave < maxGroundingWaves) { groundingWave++; this.pruneMissingVerifyPaths(); this.logLine("ANALYSIS", "groundingWave", undefined, `wave ${groundingWave}/${maxGroundingWaves}`); const beforeFocus = this.captureVerifyFocusSnapshot(); // ---------------- EVIDENCE PIPELINE ---------------- // -------- STRUCTURAL PRELOAD -------- const t0 = this.startTimer(); await structuralPreloadStep.run({ query: this.query, context: this.context }); this.logLine("ANALYSIS", "structuralPreload", t0()); const t1 = this.startTimer(); await evidenceVerifierStep.run({ query: this.query, context: this.context }); this.logLine("ANALYSIS", "collectAnalysisEvidence", t1()); const t2 = this.startTimer(); await fileCheckStep(this.context); this.logLine("ANALYSIS", "fileCheckStep", t2()); const t3 = this.startTimer(); await selectRelevantSourcesStep.run({ query: this.query, context: this.context }); this.logLine("ANALYSIS", "selectRelevantSources", t3()); // ---------------- READINESS GATE ---------------- const t4 = this.startTimer(); await readinessGateStep.run(this.context); this.logLine("ANALYSIS", "readinessGate", t4()); ready = this.context.analysis?.readiness?.decision === "ready"; if (ready) { break; } // ---------------- INFORMATION ACQUISITION ---------------- const canRouteSearchExpansion = this.canExecuteRoute("search-expand"); if (!canRouteSearchExpansion) { this.logLine("PLAN", "infoPlanGen", undefined, "skipped (routing disallows search expansion)", { highlight: false }); } else if (this.canExecutePhase("planning") && this.canExecuteScope("planning")) { const t = this.startTimer(); await infoPlanGenStep.run(this.context); const infoPlan = this.context.analysis?.planSuggestion?.plan ?? { steps: [] }; for (const step of infoPlan.steps) { const stepIO = { query: this.query }; await this.executeStep(step, stepIO); } this.logLine("PLAN", "infoPlanGen", t(), undefined, { highlight: false }); } const afterFocus = this.captureVerifyFocusSnapshot(); const hasFocusGrowth = this.logVerifyFocusDelta(beforeFocus, afterFocus); stagnantWaves = hasFocusGrowth ? 0 : stagnantWaves + 1; if (this.shouldStopVerifyForSaturation(stagnantWaves, MAX_STAGNANT_WAVES)) break; this.logLine("HASINFO", "Not ready — looping back to evidence collection", undefined, undefined, { highlight: false }); } // Grounding is the phase boundary that decides whether execution may start. if (!this.isWorkLoopReady()) return; this.ensureTaskForWorkLoop(); this.recalibrateRoutingAfterVerify(); // Research gate is evaluated after runResearch() in run(). } /* ───────────── research ───────────── */ /** * Seeds explicit research task steps for complex repo-wide lanes. * Example: enqueue research-impact-map, research-symbol-trace, and research-risk-check. */ async runResearch() { var _a, _b; if (!this.canExecuteRoute("research")) { this.logLine("RESEARCH", "taskStepSeed", undefined, "skipped (route disallows research)"); return; } if (!this.context.task) return; (_a = this.context.task).taskSteps || (_a.taskSteps = []); await researchPlanGenStep.run(this.context); const generatedSteps = (this.context.analysis?.planSuggestion?.plan?.steps ?? []) .filter(step => typeof step.action === "string" && step.action.startsWith("research-")) .map(step => { const action = step.action; const defaultFilePath = action === "research-impact-map" ? "__research__/impact-map" : action === "research-symbol-trace" ? "__research__/symbol-trace" : action === "research-risk-check" ? "__research__/risk-check" : "__research__/architecture-synthesis"; return { action, filePath: step.targetFile || defaultFilePath, notes: step.description || `Run ${step.action}`, }; }); const fallbackResearchSteps = [ { action: "research-impact-map", filePath: "__research__/impact-map", notes: "Map cross-file impact before code changes.", }, { action: "research-symbol-trace", filePath: "__research__/symbol-trace", notes: "Trace key symbols across related files.", }, { action: "research-risk-check", filePath: "__research__/risk-check", notes: "Record risks, assumptions, and constraints before edits.", }, { action: "research-architecture-synthesis", filePath: "__research__/architecture-synthesis", notes: "Synthesize architecture summary, shared patterns, hotspots, and coupling points.", }, ]; const researchSteps = generatedSteps.length > 0 ? generatedSteps : fallbackResearchSteps; let seededCount = 0; for (const step of researchSteps) { const exists = this.context.task.taskSteps.some(s => s.filePath === step.filePath && s.action === step.action); if (exists) continue; this.context.task.taskSteps.push({ taskId: this.context.task.id, filePath: step.filePath, action: step.action, status: "pending", notes: step.notes, result: { phase: "research", seededBy: "runResearch" }, }); seededCount++; } const plannedResearchSteps = this.context.task.taskSteps .filter(s => typeof s.action === "string" && s.action.startsWith("research-")) .map(s => ({ action: s.action, filePath: s.filePath, status: s.status, notes: s.notes, })); logInputOutput("runResearch", "output", { source: generatedSteps.length > 0 ? "generated" : "fallback", seededCount, totalResearchSteps: plannedResearchSteps.length, steps: plannedResearchSteps, }); (_b = this.context).analysis || (_b.analysis = {}); this.context.analysis.planSuggestion = undefined; this.logLine("RESEARCH", "taskStepSeed", undefined, `${seededCount} research step(s) added (${generatedSteps.length > 0 ? "generated" : "fallback"})`); } /* ───────────── plan ───────────── */ /** * Seeds ordered execution task steps from selected files + research/verify artifacts. * Example: prioritize files that are both selected and research-touched. */ async runPlan() { var _a, _b; if (!this.context.task) return; if (!this.canExecutePhase("planning") || !this.canExecuteScope("planning")) return; (_a = this.context).analysis || (_a.analysis = {}); (_b = this.context.task).taskSteps || (_b.taskSteps = []); const existingExecutionPaths = new Set(this.context.task.taskSteps .filter(step => !!step.filePath && !step.filePath.startsWith("__research__/")) .map(step => step.filePath)); const selectedFiles = this.context.analysis.focus?.selectedFiles ?? []; const touchedFromResearch = this.context.analysis.researchArtifacts?.touchedFiles ?? []; const route = this.context.analysis.routingDecision; const useFocusedSelectedPlanOnly = this.context.analysis.readiness?.decision === "ready" && (route?.decision === "has-info") && (route?.scopeLocked ?? false) && (route?.allowSearch === false) && selectedFiles.length > 0; const verifyMinConfidence = this.getVerifyConfidenceThresholdForPlan(); const verifyEntries = Object.entries(this.context.analysis.verify?.byFile ?? {}); const verifyRelevantFiles = verifyEntries .filter(([_, verify]) => verify?.isRelevant && (verify.fileConfidence ?? 0) >= verifyMinConfidence) .map(([filePath]) => filePath); const verifySkippedLowConfidenceCount = verifyEntries.filter(([_, verify]) => !!verify?.isRelevant && (verify.fileConfidence ?? 0) < verifyMinConfidence).length; const rankPath = (filePath) => { const inSelected = selectedFiles.includes(filePath); const inResearchTouched = touchedFromResearch.includes(filePath); const inVerify = verifyRelevantFiles.includes(filePath); if (inSelected && inResearchTouched) return 0; if (inSelected) return 1; if (inResearchTouched) return 2; if (inVerify) return 3; return 4; }; const plannedPathsSource = useFocusedSelectedPlanOnly ? Array.from(new Set(selectedFiles)) : Array.from(new Set([ ...selectedFiles, ...touchedFromResearch, ...verifyRelevantFiles, ])); const plannedPaths = plannedPathsSource .filter(filePath => !!filePath && !filePath.startsWith("__research__/") && fs.existsSync(filePath)) .sort((a, b) => rankPath(a) - rankPath(b)) .slice(0, 16); let seededCount = 0; const seeded = []; for (const filePath of plannedPaths) { if (existingExecutionPaths.has(filePath)) continue; const rank = rankPath(filePath); const notes = rank === 0 ? "Plan priority: selected + research-touched" : rank === 1 ? "Plan priority: selected file" : rank === 2 ? "Plan priority: research-touched file" : "Plan priority: verify-relevant file"; this.context.task.taskSteps.push({ taskId: this.context.task.id, filePath, status: "pending", notes, result: { phase: "plan", seededBy: "runPlan", priorityRank: rank, }, }); seeded.push({ filePath, rank, notes }); seededCount++; } logInputOutput("runPlan", "output", { seededCount, totalPlannedPaths: plannedPaths.length, selectedFileCount: selectedFiles.length, researchTouchedCount: touchedFromResearch.length, verifyRelevantCount: verifyRelevantFiles.length, focusedSelectedOnly: useFocusedSelectedPlanOnly, verifyMinConfidence, verifySkippedLowConfidenceCount, seeded, }); this.logLine("PLAN", "taskStepSeed", undefined, `${seededCount} execution step(s) planned`); } /** * Sets minimum verify confidence before a file can be plan-seeded from verify-only signal. * Example: single-file lanes require higher confidence than repo-wide lanes. */ getVerifyConfidenceThresholdForPlan() { const scope = this.context.analysis?.scopeType ?? "repo-wide"; if (scope === "single-file") return 0.45; if (scope === "multi-file") return 0.35; return 0.3; } /** * Re-routes after verify when evidence converges on selected files with high confidence. * Example: selected files strongly verified => disable expansion/research and lock focused execution. */ recalibrateRoutingAfterVerify() { var _a; (_a = this.context).analysis || (_a.analysis = {}); const routing = this.context.analysis.routingDecision; if (!routing) return; const selectedFiles = this.context.analysis.focus?.selectedFiles ?? []; if (selectedFiles.length === 0) return; const readinessConfidence = this.context.analysis.readiness?.confidence ?? 0; const intentConfidence = this.context.analysis.intent?.confidence ?? 0; const minFileConfidence = 0.28; const strongSelected = selectedFiles.filter(filePath => { const verify = this.context.analysis?.verify?.byFile?.[filePath]; return verify?.isRelevant === true && (verify.fileConfidence ?? 0) >= minFileConfidence; }); const convergedSingle = selectedFiles.length === 1 && strongSelected.length === 1 && readinessConfidence >= 0.9 && intentConfidence >= 0.8; const convergedMulti = selectedFiles.length >= 2 && strongSelected.length >= 2 && readinessConfidence >= 0.9 && intentConfidence >= 0.75; if (!convergedSingle && !convergedMulti) return; routing.decision = "has-info"; routing.allowSearch = false; routing.allowResearch = false; routing.scopeLocked = true; routing.rationale = `${routing.rationale}; postVerify=focused-selection(${strongSelected.length})`; this.logLine("TASK", "Routing recalibrated", undefined, `focused=${selectedFiles.length} selected, strong=${strongSelected.length}`); } /* ───────────── work loop ───────────── */ async runWorkLoop() { if (this.context.task.status !== "active") return; this.ensureTaskForWorkLoop(); const MAX_TASK_STEPS = this.getTaskStepBudget(); let stepCount = 0; while (stepCount < MAX_TASK_STEPS && this.context.task.status === "active") { const nextAction = await this.resolveNextTaskAction(); if (nextAction === "request-feedback") { this.persistTaskStatus("paused"); this.logLine("TASK", "Execution paused — awaiting user clarification", undefined, undefined, { highlight: false }); return; } if (nextAction === "complete") { this.persistTaskStatus("completed"); this.logLine("TASK", "All selected files processed — task complete", undefined, undefined, { highlight: false }); return; } const taskStep = await iterationFileSelector.run(this.context); if (!taskStep) { this.persistTaskStatus("completed"); this.logLine("TASK", "No eligible taskStep found — task complete", undefined, undefined, { highlight: false }); return; } stepCount++; this.startTaskStep(taskStep, stepCount); // --------------------------- // Step-level iterations // --------------------------- const stepAction = await this.runStepIterations(taskStep); this.finishTaskStep(taskStep, stepCount, stepAction); } this.logLine("TASK", "Max task step limit reached — stopping work loop", undefined, undefined, { highlight: false }); } /* ───────────── finalize ───────────── */ async runFinalize() { await finalAnswerModule.run({ query: this.query, context: this.context }); persistTaskData(this.context, this.taskId, getDbForRepo(), this.logLine.bind(this)); this.logLine("TASK", "Finalize complete", undefined, undefined, { highlight: false }); } /* ───────────── step iterations ───────────── */ /** * Iterates one task step until it completes, needs feedback, or asks for redo. * Example: validation failure sets nextAction=redo-step and re-runs iteration. */ async runStepIterations(taskStep) { const MAX_ITERATIONS = 5; let loopCount = 0; const getNextIterationAction = () => { const nextAction = taskStep.result?.stepReasoning?.nextAction; if (!["continue", "redo-step", "expand-scope", "request-feedback", "complete"].includes(nextAction ?? "")) { return "continue"; } return nextAction; }; while (loopCount < MAX_ITERATIONS) { this.runCount++; loopCount++; if (taskStep.result?.stepReasoning) taskStep.result.stepReasoning.nextAction = undefined; await this.runWorkIteration(taskStep); const nextAction = getNextIterationAction(); this.logLine("STEP-LOOP", `nextAction = ${nextAction}`); if (nextAction === "complete") return "complete"; if (nextAction === "request-feedback") return "request-feedback"; if (nextAction === "redo-step") continue; } return "continue"; } /* ───────────── work iteration ───────────── */ /** * Executes one analyze/transform/validate pass for the current task step. * Example: generate analysis plan, run one transform step, then validate. */ async runWorkIteration(taskStep) { if (!this.context.analysis) this.context.analysis = {}; if (taskStep.action?.startsWith("research-")) { await this.executeResearchTaskStep(taskStep); return; } if (this.canExecutePhase("analysis") && this.canExecuteScope("analysis")) { const tAnalysis = this.startTimer(); await analysisPlanGenStep.run(this.context); this.logLine("PLAN", "analysisPlanGen", tAnalysis(), undefined, { highlight: false }); const analysisPlan = this.context.analysis?.planSuggestion?.plan ?? { steps: [] }; for (const step of analysisPlan.steps) { const tStep = this.startTimer(); await this.executeStep(step, { query: this.query }); this.logLine("PLANNING-STEP", step.action || "unnamedStep", tStep()); } if (this.context.analysis) this.context.analysis.planSuggestion = undefined; } if (this.canExecutePhase("transform") && this.canExecuteScope("transform")) { const tTransform = this.startTimer(); await transformPlanGenStep.run(this.context); this.logLine("PLAN", "transformPlanGen", tTransform(), undefined, { highlight: false }); const transformPlan = this.context.analysis?.planSuggestion?.plan ?? { steps: [] }; const firstStep = transformPlan.steps[0]; if (firstStep) { const tStep = this.startTimer(); await this.executeStep(firstStep, { query: this.query }); this.logLine("PLANNING-STEP", `#1 (only) - ${firstStep.action || "unnamedStep"}`, tStep()); } if (this.context.analysis) this.context.analysis.planSuggestion = undefined; if (this.canExecutePhase("write") && this.canExecuteScope("write")) { const tWrite = this.startTimer(); await writeFileStep.run({ query: this.query, context: this.context }); this.logLine("WRITE", "writeFileStep", tWrite()); } const tValidate = this.startTimer(); await validateChangesStep.run(this.context); this.logLine("VALIDATION", "validateChangesStep", tValidate()); } const tReason = this.startTimer(); await reasonNextStep.run(this.context, taskStep); this.logLine("REASONING", "reasonNextStep", tReason()); const tCollab = this.startTimer(); await collaboratorStep.run(this.context); this.logLine("FEEDBACK", "collaboratorStep", tCollab()); const tIntegrate = this.startTimer(); await integrateFeedbackStep.run(this.context); this.logLine("FEEDBACK", "integrateFeedbackStep", tIntegrate()); } /** * Executes deterministic research steps and marks them complete. * Example: research-impact-map summarizes affected files and seeds understanding notes. */ async executeResearchTaskStep(taskStep) { var _a, _b; const selectedFiles = this.context.analysis?.focus?.selectedFiles ?? []; const candidateFiles = this.context.analysis?.focus?.candidateFiles ?? []; const fileAnalysis = this.context.analysis?.fileAnalysis ?? {}; const researchTerms = this.buildResearchTerms(); const researchPaths = this.collectResearchPaths(24); const corpus = this.loadResearchCorpus(researchPaths, 12, 12000); const understanding = (_b = ((_a = this.context).analysis || (_a.analysis = {}))).understanding || (_b.understanding = { assumptions: [], constraints: [], risks: [], sharedPatterns: [], hotspots: [], couplingPoints: [], }); const addUnique = (arr, value) => { if (!arr) return; if (!arr.includes(value)) arr.push(value); }; let summary = ""; let collectedData = { selectedFiles: selectedFiles.slice(0, 12), selectedFileCount: selectedFiles.length, candidateFileCount: candidateFiles.length, researchTerms, corpusFilesRead: corpus.length, corpusPaths: corpus.map(f => f.path).slice(0, 12), }; switch (taskStep.action) { case "research-impact-map": { const touched = selectedFiles.length; const impactRows = corpus .map(file => { const termHits = this.computeTermHits(file.content, researchTerms); const termHitTotal = Object.values(termHits).reduce((acc, n) => acc + n, 0); const importCount = this.countRegex(file.content, /\bimport\b|\brequire\s*\(/g); const exportCount = this.countRegex(file.content, /\bexport\b|module\.exports/g); const score = termHitTotal * 3 + importCount * 2 + exportCount; return { filePath: file.path, score, termHits, importCount, exportCount, lineCount: file.lineCount, }; }) .sort((a, b) => b.score - a.score) .slice(0, 8); summary = `Impact map across ${touched} selected file(s).`; addUnique(understanding.constraints, `Refactor impact spans ${touched} file(s).`); collectedData = { ...collectedData, touchedFiles: selectedFiles.slice(0, 20), impactSignals: [ `selected=${selectedFiles.length}`, `candidates=${candidateFiles.length}`, ], impactMap: impactRows, }; break; } case "research-symbol-trace": { const structuralSymbols = Object.values(fileAnalysis) .flatMap(fa => fa.structural?.functions?.map(fn => fn.name).filter(Boolean) ?? []) .slice(0, 24); const fallbackSymbols = corpus .flatMap(file => Array.from(file.content.matchAll(/\b(function|class|const|let|var)\s+([A-Za-z_]\w*)/g)).map(m => m[2])) .filter(Boolean); const symbolPool = Array.from(new Set([...structuralSymbols, ...fallbackSymbols])).slice(0, 18); const traceRows = symbolPool .map(symbol => { const escaped = symbol.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); const re = new RegExp(`\\b${escaped}\\b`, "g"); const files = corpus .map(file => ({ filePath: file.path, count: this.countRegex(file.content, re) })) .filter(item => item.count > 0); return { symbol, occurrenceCount: files.reduce((acc, f) => acc + f.count, 0), files: files.slice(0, 8), }; }) .filter(row => row.occurrenceCount > 0) .sort((a, b) => b.occurrenceCount - a.occurrenceCount) .slice(0, 10); summary = traceRows.length ? `Traced ${traceRows.length} symbol(s) from corpus.` : "No structural symbols found; symbol trace used filename-level anchors."; addUnique(understanding.assumptions, "Symbol trace coverage is partial and based on current selected files."); collectedData = { ...collectedData, tracedSymbols: traceRows.map(s => s.symbol), symbolTrace: traceRows, analyzedFileCount: Object.values(fileAnalysis).filter(fa => fa?.semanticAnalyzed).length, }; break; } case "research-risk-check": { const riskPatterns = [ { id: "empty-catch", description: "Empty catch blocks", pattern: /catch\s*\(\s*[^)]*\)\s*\{\s*\}/g }, { id: "console-error", description: "Console error logging", pattern: /\bconsole\.error\s*\(/g }, { id: "forced-exit", description: "Process exit usage", pattern: /\bprocess\.exit\s*\(/g }, { id: "throws-string", description: "Throwing non-Error values", pattern: /\bthrow\s+['"`]/g }, ]; const riskRows = riskPatterns .map(risk => { const perFile = corpus .map(file => ({ filePath: file.path, count: this.countRegex(file.content, risk.pattern) })) .filter(hit => hit.count > 0); return { id: risk.id, description: risk.description, totalHits: perFile.reduce((acc, hit) => acc + hit.count, 0), files: perFile.slice(0, 8), }; }) .filter(risk => risk.totalHits > 0); summary = "Recorded baseline risks/assumptions/constraints before transformation."; addUnique(understanding.risks, "Cross-file regressions are possible without full symbol coverage."); addUnique(understanding.risks, "Validation should run after each transform step."); for (const risk of riskRows) { addUnique(understanding.risks, `${risk.description}: ${risk.totalHits} hit(s)`); } collectedData = { ...collectedData, risks: understanding.risks?.slice(0, 12) ?? [], assumptions: understanding.assumptions?.slice(0, 12) ?? [], constraints: understanding.constraints?.slice(0, 12) ?? [], riskSignals: riskRows, }; break; } case "research-architecture-synthesis": { const analyzedPaths = Object.entries(fileAnalysis) .filter(([_, fa]) => fa?.semanticAnalyzed) .map(([filePath]) => filePath); const architectureFiles = (analyzedPaths.length > 0 ? analyzedPaths : corpus.map(file => file.path)).slice(0, 8); understanding.problemStatement = `Summarize repository architecture and identify weak coupling points across ${selectedFiles.length} scoped file(s).`; for (const p of architectureFiles) { const base = path.basename(p); if (base.toLowerCase().includes("registry")) { addUnique(understanding.hotspots, `${base}: central registry point with broad module fan-in.`); addUnique(understanding.couplingPoints, `${base}: centralized module registration coupling.`); } if (base.toLowerCase().includes("module")) { addUnique(understanding.sharedPatterns, `${base}: module-oriented pipeline pattern.`); } } addUnique(understanding.sharedPatterns, "Pipeline modules follow a shared Module/ModuleIO contract."); addUnique(understanding.couplingPoints, "Shared config/model utilities create cross-module coupling."); addUnique(understanding.hotspots, "Core orchestration and registry layers are high-impact change zones."); summary = `Architecture synthesis completed from ${architectureFiles.length} analyzed file(s).`; const priorResearch = (this.context.task?.taskSteps ?? []) .filter(step => step.action?.startsWith("research-") && step.status === "completed") .map(step => ({ action: step.action, summary: step.result?.research?.summary, })); collectedData = { ...collectedData, architectureInputFiles: architectureFiles, priorResearchSummaries: priorResearch, problemStatement: understanding.problemStatement ?? "", sharedPatterns: understanding.sharedPatterns?.slice(0, 12) ?? [], hotspots: understanding.hotspots?.slice(0, 12) ?? [], couplingPoints: understanding.couplingPoints?.slice(0, 12) ?? [], }; break; } default: { summary = `Unknown research action: ${taskStep.action}`; collectedData = { ...collectedData, warning: "No handler for research action", }; break; } } const completedAt = new Date().toISOString(); const researchEntry = { action: taskStep.action, summary, collectedData, selectedFileCount: selectedFiles.length, completedAt, }; taskStep.result || (taskStep.result = {}); taskStep.result.research = researchEntry; taskStep.result.stepReasoning = { nextAction: "complete", rationale: `Research step completed: ${summary}`, confidence: 0.95, }; taskStep.status = "completed"; this.persistResearchArtifact(researchEntry); logInputOutput("runResearchStep", "output", { research: researchEntry, stepReasoning: taskStep.result.stepReasoning, status: taskStep.status, }); } /** * Persists normalized research outputs into analysis.researchArtifacts. * Example: latestByAction["research-risk-check"] stores current risk findings. */ persistResearchArtifact(entry) { var _a, _b; (_a = this.context).analysis || (_a.analysis = {}); const store = (_b = this.context.analysis).researchArtifacts || (_b.researchArtifacts = { latestByAction: {}, history: [], touchedFiles: [], lastUpdatedAt: entry.completedAt, }); store.latestByAction || (store.latestByAction = {}); store.history || (store.history = []); store.touchedFiles || (store.touchedFiles = []); store.latestByAction[entry.action] = entry; store.history.push(entry); const data = entry.collectedData ?? {}; const touched = this.extractPathsFromResearchData(data); const merged = new Set([...(store.touchedFiles ?? []), ...touched]); store.touchedFiles = Array.from(merged); store.lastUpdatedAt = entry.completedAt; } /** * Extracts file paths from heterogeneous research payloads. * Example: impactMap rows and architectureInputFiles are both merged into touchedFiles. */ extractPathsFromResearchData(data) { const paths = new Set(); const addPath = (value) => { if (typeof value === "string" && value.trim().length > 0) { paths.add(value); } }; const addPathArray = (value) => { if (!Array.isArray(value)) return; for (const item of value) { addPath(item); } }; addPathArray(data.corpusPaths); addPathArray(data.touchedFiles); addPathArray(data.architectureInputFiles); if (Array.isArray(data.impactMap)) { for (const row of data.impactMap) { addPath(row.filePath); } } if (Array.isArray(data.symbolTrace)) { for (const row of data.symbolTrace) { const files = row.files; if (!Array.isArray(files)) continue; for (const fileRow of files) { addPath(fileRow.filePath); } } } if (Array.isArray(data.riskSignals)) { for (const row of data.riskSignals) { const files = row.files; if (!Array.isArray(files)) continue; for (const fileRow of files) { addPath(fileRow.filePath); } } } return Array.from(paths); } /** * Builds lightweight query terms for deterministic research scanning. * Example: "error handling test suite" -> ["error","handling","test","suite"]. */ buildResearchTerms() { const query = this.context.analysis?.intent?.normalizedQuery ?? this.context.initContext?.userQuery ?? this.query; const stopWords = new Set([ "the", "and", "for", "with", "from", "this", "that", "what", "how", "is", "are", "was", "were", "can", "could", "should", "would", "into", "about", "across", "repo", "codebase", "please", ]); return Array.from(new Set(query .toLowerCase() .split(/[^a-z0-9_]+/g) .filter(token => token.length >= 3 && !stopWords.has(token)))).slice(0, 10); } /** * Collects research candidate paths from selected, candidate, related, and working files. * Example: selected files are prioritized before broader related file pool. */ collectResearchPaths(maxPaths) { const focus = this.context.analysis?.focus; const workingPaths = (this.context.workingFiles ?? []).map(file => file.path); const related = this.context.initContext?.relatedFiles ?? []; const combined = [ ...(focus?.selectedFiles ?? []), ...(focus?.candidateFiles ?? []), ...workingPaths, ...related, ]; const unique = Array.from(new Set(combined)); return unique .filter(filePath => !filePath.startsWith("__research__/") && fs.existsSync(filePath)) .slice(0, maxPaths); } /** * Reads a bounded corpus from candidate paths. * Example: read first 12 files, max 12k chars per file, skipping binary payloads. */ loadResearchCorpus(filePaths, maxFiles, maxCharsPerFile) { const corpus = []; for (const filePath of filePaths.slice(0, maxFiles)) { try { const raw = fs.readFileSync(filePath, "utf-8"); if (raw.includes("\u0000")) continue; const content = raw.slice(0, maxCharsPerFile); corpus.push({ path: filePath, content, lineCount: content.split("\n").length, charCount: content.length, }); } catch { // Ignore unreadable files and continue. } } return corpus; } /** * Counts regex matches safely. * Example: countRegex(code, /import/g) -> number of import occurrences. */ countRegex(content, pattern) { const source = pattern.source; const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`; const re = new RegExp(source, flags); return Array.from(content.matchAll(re)).length; } /** * Computes per-term match counts for a file body. * Example: terms ["error","test"] -> { error: 4, test: 2 }. */ computeTermHits(content, terms) { const hits = {}; for (const term of terms) { const escaped = term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); const count = this.countRegex(content, new RegExp(`\\b${escaped}\\b`, "gi")); if (count > 0) { hits[term] = count; } } return hits; } /* ───────────── step executor ───────────── */ /** * Executes a single step using its corresponding module. * * @param step - The step to execute. * @param input - The input data for the step. * @returns A promise resolving to the output of the step. * @throws If the module is not found or execution fails. */ async executeStep(step, input) { const stop = this.startTimer(); this.context.currentStep = step; const mod = resolveModuleForAction(step.action); if (!mod) { this.logLine("EXECUTE", step.action, stop(), "skipped (missing module)"); return { query: input.query, content: input.content, data: { skipped: true } }; } try { this.ui.update(`Running step: ${step.action}`); const output = await mod.run({ query: step.description ?? input.query, content: input.data ?? input.content, context: this.context }); const errors = Array.isArray(output.data?.errors) ? output.data.errors.filter((e) => typeof e === "string" && e.trim().length > 0) : []; if (errors.length > 0) { const detail = errors.slice(0, 2).join(" | "); this.logLine("EXECUTE", step.action, stop(), `completed with errors: ${detail}`); console.error(`[${step.action}] ${errors.join(" | ")}`); } return output; } catch (err) { this.logLine("EXECUTE", step.action, stop(), "failed"); throw err; } } /* ───────────── extracted from runSearch ───────────── */ resolveInitialRetrievalQueries() { const rawUserQuery = this.context.initContext?.userQuery ?? this.query; const retrievalQuery = this.context.analysis?.intent?.normalizedQuery?.trim() || rawUserQuery; return { rawUserQuery, retrievalQuery }; } async fetchInitialRetrievalResults(retrievalQuery) { return semanticSearchFiles(retrievalQuery, RELATED_FILES_LIMIT, this.context.analysis?.intent ?? {}); } mapSearchResultToTopFile(result) { return { id: result.id, path: result.path, summary: result.summary ?? undefined, bm25Score: result.bm25Score, }; } mapSearchResultToRelatedFile(result) { return { id: result.id, path: result.path, summary: result.summary ?? undefined, bm25Score: result.bm25Score, }; } buildInitialRetrievalPromptArgs(results, retrievalQuery) { const topFiles = results .slice(0, NUM_TOPFILES) .map(result => this.mapSearchResultToTopFile(result)); const relatedFiles = results .slice(NUM_TOPFILES) .map(result => this.mapSearchResultToRelatedFile(result)); const queryExpansionTerms = results.find(result => Array.isArray(result.queryExpansionTerms))?.queryExpansionTerms; return { topFiles, relatedFiles, query: retrievalQuery, queryExpansionTerms, }; } mergeSeededInitialContext(rawUserQuery, seededContext) { // Merge retrieval seed into initContext without losing previously discovered files. // Example: keep old relatedFiles and append newly seeded files from buildLightContext. const existingInit = this.context.initContext ?? { userQuery: rawUserQuery }; const seededInit = seededContext.initContext; const mergedRelatedFiles = Array.from(new Set([ ...(existingInit.relatedFiles ?? []), ...(seededInit?.relatedFiles ?? []), ])); const mergedScores = { ...(existingInit.relatedFileScores ?? {}), ...(seededInit?.relatedFileScores ?? {}), }; const mergedQueryExpansionTerms = Array.from(new Set([ ...(existingInit.queryExpansionTerms ?? []), ...(seededInit?.queryExpansionTerms ?? []), ])); this.context.initContext = { ...existingInit, ...(seededInit ?? {}), userQuery: rawUserQuery, relatedFiles: mergedRelatedFiles, relatedFileScores: mergedScores, queryExpansionTerms: mergedQueryExpansionTerms, folderCapsules: (seededInit?.folderCapsules?.length ? seededInit.folderCapsules : existingInit.folderCapsules) ?? [], }; return mergedRelatedFiles.length; } applyDeterministicPreGroundingPrefilter(retrievalQuery) { // Rank and cap retrieval candidates before grounding to reduce noisy evidence passes. // Example: explicit filename anchors are always kept even if BM25 score is low. const init = this.context.