scai
Version:
> **A local-first AI CLI for understanding, querying, and iterating on large codebases.** > **100% local • No token costs • No cloud • No prompt injection • Private by design**
1,047 lines • 78 kB
JavaScript
import { builtInModules } from "../pipeline/registry/moduleRegistry.js";
import { logInputOutput } from "../utils/promptLogHelper.js";
import { infoPlanGenStep } from "./infoPlanGenStep.js";
import { understandIntentStep } from "./understandIntentStep.js";
import { transformPlanGenStep } from "./transformPlanGenStep.js";
import { getDbForRepo } from "../db/client.js";
import { writeFileStep } from "./writeFileStep.js";
import { resolveExecutionModeStep } from "./resolveExecutionModeStep.js";
import { fileCheckStep } from "./fileCheckStep.js";
import { analysisPlanGenStep } from "./analysisPlanGenStep.js";
import { readinessGateStep } from "./readinessGateStep.js";
import { scopeClassificationStep } from "./scopeClassificationStep.js";
import { routingDecisionStep } from "./routingDecisionStep.js";
import { evidenceVerifierStep } from "./evidenceVerifierStep.js";
import { validateChangesStep } from './validateChangesStep.js';
import { reasonNextTaskStep } from './reasonNextTaskStep.js';
import { collaboratorStep } from './collaboratorStep.js';
import { integrateFeedbackStep } from './integrateFeedbackStep.js';
import { researchPlanGenStep } from "./researchPlanGenStep.js";
import { selectRelevantSourcesStep } from "./selectRelevantSourcesStep.js";
import { iterationFileSelector } from "./iterationFileSelector.js";
import { finalAnswerModule } from "../pipeline/modules/finalAnswerModule.js";
import { reasonNextStep } from "./reasonNextStep.js";
import { buildLightContext } from "../utils/buildContextualPrompt.js";
import { semanticSearchFiles } from "../db/fileIndex.js";
import { NUM_TOPFILES, RELATED_FILES_LIMIT } from "../constants.js";
import { structuralPreloadStep } from "./structuralPreloadStep.js";
import { extractFileReferences } from "../utils/extractFileReferences.js";
import { PREFILTER_STOP_WORDS } from "../fileRules/stopWords.js";
import { MAX_WELL_KNOWN_REPO_FILES, WELL_KNOWN_REPO_FILE_BASENAMES } from "../fileRules/wellKnownRepoFiles.js";
import chalk from "chalk";
import path from "path";
import fs from "fs";
/* ───────────────────────── registry ───────────────────────── */
const MODULE_REGISTRY = Object.fromEntries(Object.entries(builtInModules).map(([name, mod]) => [name, mod]));
function resolveModuleForAction(action) {
return MODULE_REGISTRY[action];
}
/* ───────────────────────── agent ───────────────────────── */
/**
* MainAgent is the core orchestrator of the SCAI agent system.
* It manages the execution flow, coordinates steps, and handles context management.
*
* The agent follows a multi-phase execution model:
* 1. Boot: Determine intent and execution mode
* 2. Precheck: File existence validation
* 3. Scope: Determine where to act and what actions are allowed
* 4. Grounding & Readiness Loop: Acquire evidence and verify readiness
* 5. Analysis: Perform in-depth analysis
* 6. Transform: Generate and execute transformations
* 7. Finalize: Complete the task and persist data
*/
export class MainAgent {
/**
* Creates a new MainAgent instance.
*
* @param context - The structured context for the agent.
* @param ui - The UI interface for agent communication.
*/
constructor(context, ui) {
this.runCount = 0;
this.context = context;
this.query = context.initContext?.userQuery ?? "";
this.ui = ui;
}
/* ───────────── main run ───────────── */
async run() {
try {
this.runCount = 0;
await this.runBoot();
await this.runScope();
await this.runSearch();
await this.runVerify();
await this.runResearch();
const canProceedToExecution = this.isResearchGateSatisfied();
if (canProceedToExecution) {
await this.runPlan();
await this.runWorkLoop();
}
await this.runFinalize();
}
finally {
this.ui.stop(); // ← guaranteed cleanup
}
}
/* ───────────── boot ───────────── */
async runBoot() {
var _a;
await understandIntentStep.run({ context: this.context });
// Boot the task and get the real DB taskId
this.taskId = bootTaskForRepo(this.context, getDbForRepo(), (phase, step, ms, desc) => this.logLine(phase, step, ms, desc, { highlight: true }));
(_a = this.context).task || (_a.task = {
id: this.taskId,
projectId: 0,
status: "active",
initialQuery: this.context.initContext?.userQuery ?? "",
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
taskSteps: [],
});
this.context.task.id = this.taskId;
this.logLine("TASK", "Boot complete", undefined, `taskId=${this.taskId}`, { highlight: true });
}
/* ───────────── scope ───────────── */
async runScope() {
await scopeClassificationStep.run(this.context);
await resolveExecutionModeStep.run(this.context);
await routingDecisionStep.run(this.context);
const routing = this.context.analysis?.routingDecision;
if (routing) {
this.logLine("TASK", "Routing decision", undefined, `${routing.decision} | search=${routing.allowSearch} | research=${routing.allowResearch} | transform=${routing.allowTransform} | scopeLocked=${routing.scopeLocked}`);
}
this.logLine("TASK", "Scope classification complete");
}
/* ───────────── search ───────────── */
/**
* Seeds initial candidate files using semantic retrieval + deterministic prefilter.
* Example: query mentions "MainAgent" -> relatedFiles are narrowed before grounding.
*/
async runSearch() {
const { rawUserQuery, retrievalQuery } = this.resolveInitialRetrievalQueries();
const t = this.startTimer();
try {
const results = await this.fetchInitialRetrievalResults(retrievalQuery);
const promptArgs = this.buildInitialRetrievalPromptArgs(results, retrievalQuery);
const seededContext = await buildLightContext(promptArgs);
const mergedRelatedCount = this.mergeSeededInitialContext(rawUserQuery, seededContext);
const prefilter = this.applyDeterministicPreGroundingPrefilter(retrievalQuery);
const repoDefaults = this.injectWellKnownRepoFiles(prefilter.after);
this.logLine("ANALYSIS", "initialRetrieval", t(), `${results.length} result(s), ${mergedRelatedCount} candidate file(s), prefilter ${prefilter.before} -> ${prefilter.after}, defaults +${repoDefaults.added} (${repoDefaults.reason})`);
}
catch (err) {
this.logLine("ANALYSIS", "initialRetrieval", t(), `failed: ${String(err)}`);
}
}
/* ───────────── verify ───────────── */
/**
* Wave-based verify loop (evidence -> readiness -> optional info acquisition).
* Example: if readiness stays not-ready, run an info plan and try another wave.
*/
async runVerify() {
let ready = false;
const maxGroundingWaves = this.getGroundingWaveBudget();
let groundingWave = 0;
let stagnantWaves = 0;
const MAX_STAGNANT_WAVES = 2;
while (groundingWave < maxGroundingWaves) {
groundingWave++;
this.pruneMissingVerifyPaths();
this.logLine("ANALYSIS", "groundingWave", undefined, `wave ${groundingWave}/${maxGroundingWaves}`);
const beforeFocus = this.captureVerifyFocusSnapshot();
// ---------------- EVIDENCE PIPELINE ----------------
// -------- STRUCTURAL PRELOAD --------
const t0 = this.startTimer();
await structuralPreloadStep.run({ query: this.query, context: this.context });
this.logLine("ANALYSIS", "structuralPreload", t0());
const t1 = this.startTimer();
await evidenceVerifierStep.run({ query: this.query, context: this.context });
this.logLine("ANALYSIS", "collectAnalysisEvidence", t1());
const t2 = this.startTimer();
await fileCheckStep(this.context);
this.logLine("ANALYSIS", "fileCheckStep", t2());
const t3 = this.startTimer();
await selectRelevantSourcesStep.run({ query: this.query, context: this.context });
this.logLine("ANALYSIS", "selectRelevantSources", t3());
// ---------------- READINESS GATE ----------------
const t4 = this.startTimer();
await readinessGateStep.run(this.context);
this.logLine("ANALYSIS", "readinessGate", t4());
ready = this.context.analysis?.readiness?.decision === "ready";
if (ready) {
break;
}
// ---------------- INFORMATION ACQUISITION ----------------
const canRouteSearchExpansion = this.canExecuteRoute("search-expand");
if (!canRouteSearchExpansion) {
this.logLine("PLAN", "infoPlanGen", undefined, "skipped (routing disallows search expansion)", { highlight: false });
}
else if (this.canExecutePhase("planning") &&
this.canExecuteScope("planning")) {
const t = this.startTimer();
await infoPlanGenStep.run(this.context);
const infoPlan = this.context.analysis?.planSuggestion?.plan ?? { steps: [] };
for (const step of infoPlan.steps) {
const stepIO = { query: this.query };
await this.executeStep(step, stepIO);
}
this.logLine("PLAN", "infoPlanGen", t(), undefined, { highlight: false });
}
const afterFocus = this.captureVerifyFocusSnapshot();
const hasFocusGrowth = this.logVerifyFocusDelta(beforeFocus, afterFocus);
stagnantWaves = hasFocusGrowth ? 0 : stagnantWaves + 1;
if (this.shouldStopVerifyForSaturation(stagnantWaves, MAX_STAGNANT_WAVES))
break;
this.logLine("HASINFO", "Not ready — looping back to evidence collection", undefined, undefined, { highlight: false });
}
// Grounding is the phase boundary that decides whether execution may start.
if (!this.isWorkLoopReady())
return;
this.ensureTaskForWorkLoop();
this.recalibrateRoutingAfterVerify();
// Research gate is evaluated after runResearch() in run().
}
/* ───────────── research ───────────── */
/**
* Seeds explicit research task steps for complex repo-wide lanes.
* Example: enqueue research-impact-map, research-symbol-trace, and research-risk-check.
*/
async runResearch() {
var _a, _b;
if (!this.canExecuteRoute("research")) {
this.logLine("RESEARCH", "taskStepSeed", undefined, "skipped (route disallows research)");
return;
}
if (!this.context.task)
return;
(_a = this.context.task).taskSteps || (_a.taskSteps = []);
await researchPlanGenStep.run(this.context);
const generatedSteps = (this.context.analysis?.planSuggestion?.plan?.steps ?? [])
.filter(step => typeof step.action === "string" && step.action.startsWith("research-"))
.map(step => {
const action = step.action;
const defaultFilePath = action === "research-impact-map"
? "__research__/impact-map"
: action === "research-symbol-trace"
? "__research__/symbol-trace"
: action === "research-risk-check"
? "__research__/risk-check"
: "__research__/architecture-synthesis";
return {
action,
filePath: step.targetFile || defaultFilePath,
notes: step.description || `Run ${step.action}`,
};
});
const fallbackResearchSteps = [
{
action: "research-impact-map",
filePath: "__research__/impact-map",
notes: "Map cross-file impact before code changes.",
},
{
action: "research-symbol-trace",
filePath: "__research__/symbol-trace",
notes: "Trace key symbols across related files.",
},
{
action: "research-risk-check",
filePath: "__research__/risk-check",
notes: "Record risks, assumptions, and constraints before edits.",
},
{
action: "research-architecture-synthesis",
filePath: "__research__/architecture-synthesis",
notes: "Synthesize architecture summary, shared patterns, hotspots, and coupling points.",
},
];
const researchSteps = generatedSteps.length > 0 ? generatedSteps : fallbackResearchSteps;
let seededCount = 0;
for (const step of researchSteps) {
const exists = this.context.task.taskSteps.some(s => s.filePath === step.filePath && s.action === step.action);
if (exists)
continue;
this.context.task.taskSteps.push({
taskId: this.context.task.id,
filePath: step.filePath,
action: step.action,
status: "pending",
notes: step.notes,
result: { phase: "research", seededBy: "runResearch" },
});
seededCount++;
}
const plannedResearchSteps = this.context.task.taskSteps
.filter(s => typeof s.action === "string" && s.action.startsWith("research-"))
.map(s => ({
action: s.action,
filePath: s.filePath,
status: s.status,
notes: s.notes,
}));
logInputOutput("runResearch", "output", {
source: generatedSteps.length > 0 ? "generated" : "fallback",
seededCount,
totalResearchSteps: plannedResearchSteps.length,
steps: plannedResearchSteps,
});
(_b = this.context).analysis || (_b.analysis = {});
this.context.analysis.planSuggestion = undefined;
this.logLine("RESEARCH", "taskStepSeed", undefined, `${seededCount} research step(s) added (${generatedSteps.length > 0 ? "generated" : "fallback"})`);
}
/* ───────────── plan ───────────── */
/**
* Seeds ordered execution task steps from selected files + research/verify artifacts.
* Example: prioritize files that are both selected and research-touched.
*/
async runPlan() {
var _a, _b;
if (!this.context.task)
return;
if (!this.canExecutePhase("planning") || !this.canExecuteScope("planning"))
return;
(_a = this.context).analysis || (_a.analysis = {});
(_b = this.context.task).taskSteps || (_b.taskSteps = []);
const existingExecutionPaths = new Set(this.context.task.taskSteps
.filter(step => !!step.filePath &&
!step.filePath.startsWith("__research__/"))
.map(step => step.filePath));
const selectedFiles = this.context.analysis.focus?.selectedFiles ?? [];
const touchedFromResearch = this.context.analysis.researchArtifacts?.touchedFiles ?? [];
const route = this.context.analysis.routingDecision;
const useFocusedSelectedPlanOnly = this.context.analysis.readiness?.decision === "ready" &&
(route?.decision === "has-info") &&
(route?.scopeLocked ?? false) &&
(route?.allowSearch === false) &&
selectedFiles.length > 0;
const verifyMinConfidence = this.getVerifyConfidenceThresholdForPlan();
const verifyEntries = Object.entries(this.context.analysis.verify?.byFile ?? {});
const verifyRelevantFiles = verifyEntries
.filter(([_, verify]) => verify?.isRelevant &&
(verify.fileConfidence ?? 0) >= verifyMinConfidence)
.map(([filePath]) => filePath);
const verifySkippedLowConfidenceCount = verifyEntries.filter(([_, verify]) => !!verify?.isRelevant &&
(verify.fileConfidence ?? 0) < verifyMinConfidence).length;
const rankPath = (filePath) => {
const inSelected = selectedFiles.includes(filePath);
const inResearchTouched = touchedFromResearch.includes(filePath);
const inVerify = verifyRelevantFiles.includes(filePath);
if (inSelected && inResearchTouched)
return 0;
if (inSelected)
return 1;
if (inResearchTouched)
return 2;
if (inVerify)
return 3;
return 4;
};
const plannedPathsSource = useFocusedSelectedPlanOnly
? Array.from(new Set(selectedFiles))
: Array.from(new Set([
...selectedFiles,
...touchedFromResearch,
...verifyRelevantFiles,
]));
const plannedPaths = plannedPathsSource
.filter(filePath => !!filePath && !filePath.startsWith("__research__/") && fs.existsSync(filePath))
.sort((a, b) => rankPath(a) - rankPath(b))
.slice(0, 16);
let seededCount = 0;
const seeded = [];
for (const filePath of plannedPaths) {
if (existingExecutionPaths.has(filePath))
continue;
const rank = rankPath(filePath);
const notes = rank === 0
? "Plan priority: selected + research-touched"
: rank === 1
? "Plan priority: selected file"
: rank === 2
? "Plan priority: research-touched file"
: "Plan priority: verify-relevant file";
this.context.task.taskSteps.push({
taskId: this.context.task.id,
filePath,
status: "pending",
notes,
result: {
phase: "plan",
seededBy: "runPlan",
priorityRank: rank,
},
});
seeded.push({ filePath, rank, notes });
seededCount++;
}
logInputOutput("runPlan", "output", {
seededCount,
totalPlannedPaths: plannedPaths.length,
selectedFileCount: selectedFiles.length,
researchTouchedCount: touchedFromResearch.length,
verifyRelevantCount: verifyRelevantFiles.length,
focusedSelectedOnly: useFocusedSelectedPlanOnly,
verifyMinConfidence,
verifySkippedLowConfidenceCount,
seeded,
});
this.logLine("PLAN", "taskStepSeed", undefined, `${seededCount} execution step(s) planned`);
}
/**
* Sets minimum verify confidence before a file can be plan-seeded from verify-only signal.
* Example: single-file lanes require higher confidence than repo-wide lanes.
*/
getVerifyConfidenceThresholdForPlan() {
const scope = this.context.analysis?.scopeType ?? "repo-wide";
if (scope === "single-file")
return 0.45;
if (scope === "multi-file")
return 0.35;
return 0.3;
}
/**
* Re-routes after verify when evidence converges on selected files with high confidence.
* Example: selected files strongly verified => disable expansion/research and lock focused execution.
*/
recalibrateRoutingAfterVerify() {
var _a;
(_a = this.context).analysis || (_a.analysis = {});
const routing = this.context.analysis.routingDecision;
if (!routing)
return;
const selectedFiles = this.context.analysis.focus?.selectedFiles ?? [];
if (selectedFiles.length === 0)
return;
const readinessConfidence = this.context.analysis.readiness?.confidence ?? 0;
const intentConfidence = this.context.analysis.intent?.confidence ?? 0;
const minFileConfidence = 0.28;
const strongSelected = selectedFiles.filter(filePath => {
const verify = this.context.analysis?.verify?.byFile?.[filePath];
return verify?.isRelevant === true && (verify.fileConfidence ?? 0) >= minFileConfidence;
});
const convergedSingle = selectedFiles.length === 1 &&
strongSelected.length === 1 &&
readinessConfidence >= 0.9 &&
intentConfidence >= 0.8;
const convergedMulti = selectedFiles.length >= 2 &&
strongSelected.length >= 2 &&
readinessConfidence >= 0.9 &&
intentConfidence >= 0.75;
if (!convergedSingle && !convergedMulti)
return;
routing.decision = "has-info";
routing.allowSearch = false;
routing.allowResearch = false;
routing.scopeLocked = true;
routing.rationale = `${routing.rationale}; postVerify=focused-selection(${strongSelected.length})`;
this.logLine("TASK", "Routing recalibrated", undefined, `focused=${selectedFiles.length} selected, strong=${strongSelected.length}`);
}
/* ───────────── work loop ───────────── */
async runWorkLoop() {
if (this.context.task.status !== "active")
return;
this.ensureTaskForWorkLoop();
const MAX_TASK_STEPS = this.getTaskStepBudget();
let stepCount = 0;
while (stepCount < MAX_TASK_STEPS &&
this.context.task.status === "active") {
const nextAction = await this.resolveNextTaskAction();
if (nextAction === "request-feedback") {
this.persistTaskStatus("paused");
this.logLine("TASK", "Execution paused — awaiting user clarification", undefined, undefined, { highlight: false });
return;
}
if (nextAction === "complete") {
this.persistTaskStatus("completed");
this.logLine("TASK", "All selected files processed — task complete", undefined, undefined, { highlight: false });
return;
}
const taskStep = await iterationFileSelector.run(this.context);
if (!taskStep) {
this.persistTaskStatus("completed");
this.logLine("TASK", "No eligible taskStep found — task complete", undefined, undefined, { highlight: false });
return;
}
stepCount++;
this.startTaskStep(taskStep, stepCount);
// ---------------------------
// Step-level iterations
// ---------------------------
const stepAction = await this.runStepIterations(taskStep);
this.finishTaskStep(taskStep, stepCount, stepAction);
}
this.logLine("TASK", "Max task step limit reached — stopping work loop", undefined, undefined, { highlight: false });
}
/* ───────────── finalize ───────────── */
async runFinalize() {
await finalAnswerModule.run({ query: this.query, context: this.context });
persistTaskData(this.context, this.taskId, getDbForRepo(), this.logLine.bind(this));
this.logLine("TASK", "Finalize complete", undefined, undefined, { highlight: false });
}
/* ───────────── step iterations ───────────── */
/**
* Iterates one task step until it completes, needs feedback, or asks for redo.
* Example: validation failure sets nextAction=redo-step and re-runs iteration.
*/
async runStepIterations(taskStep) {
const MAX_ITERATIONS = 5;
let loopCount = 0;
const getNextIterationAction = () => {
const nextAction = taskStep.result?.stepReasoning?.nextAction;
if (!["continue", "redo-step", "expand-scope", "request-feedback", "complete"].includes(nextAction ?? "")) {
return "continue";
}
return nextAction;
};
while (loopCount < MAX_ITERATIONS) {
this.runCount++;
loopCount++;
if (taskStep.result?.stepReasoning)
taskStep.result.stepReasoning.nextAction = undefined;
await this.runWorkIteration(taskStep);
const nextAction = getNextIterationAction();
this.logLine("STEP-LOOP", `nextAction = ${nextAction}`);
if (nextAction === "complete")
return "complete";
if (nextAction === "request-feedback")
return "request-feedback";
if (nextAction === "redo-step")
continue;
}
return "continue";
}
/* ───────────── work iteration ───────────── */
/**
* Executes one analyze/transform/validate pass for the current task step.
* Example: generate analysis plan, run one transform step, then validate.
*/
async runWorkIteration(taskStep) {
if (!this.context.analysis)
this.context.analysis = {};
if (taskStep.action?.startsWith("research-")) {
await this.executeResearchTaskStep(taskStep);
return;
}
if (this.canExecutePhase("analysis") && this.canExecuteScope("analysis")) {
const tAnalysis = this.startTimer();
await analysisPlanGenStep.run(this.context);
this.logLine("PLAN", "analysisPlanGen", tAnalysis(), undefined, { highlight: false });
const analysisPlan = this.context.analysis?.planSuggestion?.plan ?? { steps: [] };
for (const step of analysisPlan.steps) {
const tStep = this.startTimer();
await this.executeStep(step, { query: this.query });
this.logLine("PLANNING-STEP", step.action || "unnamedStep", tStep());
}
if (this.context.analysis)
this.context.analysis.planSuggestion = undefined;
}
if (this.canExecutePhase("transform") && this.canExecuteScope("transform")) {
const tTransform = this.startTimer();
await transformPlanGenStep.run(this.context);
this.logLine("PLAN", "transformPlanGen", tTransform(), undefined, { highlight: false });
const transformPlan = this.context.analysis?.planSuggestion?.plan ?? { steps: [] };
const firstStep = transformPlan.steps[0];
if (firstStep) {
const tStep = this.startTimer();
await this.executeStep(firstStep, { query: this.query });
this.logLine("PLANNING-STEP", `#1 (only) - ${firstStep.action || "unnamedStep"}`, tStep());
}
if (this.context.analysis)
this.context.analysis.planSuggestion = undefined;
if (this.canExecutePhase("write") && this.canExecuteScope("write")) {
const tWrite = this.startTimer();
await writeFileStep.run({ query: this.query, context: this.context });
this.logLine("WRITE", "writeFileStep", tWrite());
}
const tValidate = this.startTimer();
await validateChangesStep.run(this.context);
this.logLine("VALIDATION", "validateChangesStep", tValidate());
}
const tReason = this.startTimer();
await reasonNextStep.run(this.context, taskStep);
this.logLine("REASONING", "reasonNextStep", tReason());
const tCollab = this.startTimer();
await collaboratorStep.run(this.context);
this.logLine("FEEDBACK", "collaboratorStep", tCollab());
const tIntegrate = this.startTimer();
await integrateFeedbackStep.run(this.context);
this.logLine("FEEDBACK", "integrateFeedbackStep", tIntegrate());
}
/**
* Executes deterministic research steps and marks them complete.
* Example: research-impact-map summarizes affected files and seeds understanding notes.
*/
async executeResearchTaskStep(taskStep) {
var _a, _b;
const selectedFiles = this.context.analysis?.focus?.selectedFiles ?? [];
const candidateFiles = this.context.analysis?.focus?.candidateFiles ?? [];
const fileAnalysis = this.context.analysis?.fileAnalysis ?? {};
const researchTerms = this.buildResearchTerms();
const researchPaths = this.collectResearchPaths(24);
const corpus = this.loadResearchCorpus(researchPaths, 12, 12000);
const understanding = (_b = ((_a = this.context).analysis || (_a.analysis = {}))).understanding || (_b.understanding = {
assumptions: [],
constraints: [],
risks: [],
sharedPatterns: [],
hotspots: [],
couplingPoints: [],
});
const addUnique = (arr, value) => {
if (!arr)
return;
if (!arr.includes(value))
arr.push(value);
};
let summary = "";
let collectedData = {
selectedFiles: selectedFiles.slice(0, 12),
selectedFileCount: selectedFiles.length,
candidateFileCount: candidateFiles.length,
researchTerms,
corpusFilesRead: corpus.length,
corpusPaths: corpus.map(f => f.path).slice(0, 12),
};
switch (taskStep.action) {
case "research-impact-map": {
const touched = selectedFiles.length;
const impactRows = corpus
.map(file => {
const termHits = this.computeTermHits(file.content, researchTerms);
const termHitTotal = Object.values(termHits).reduce((acc, n) => acc + n, 0);
const importCount = this.countRegex(file.content, /\bimport\b|\brequire\s*\(/g);
const exportCount = this.countRegex(file.content, /\bexport\b|module\.exports/g);
const score = termHitTotal * 3 + importCount * 2 + exportCount;
return {
filePath: file.path,
score,
termHits,
importCount,
exportCount,
lineCount: file.lineCount,
};
})
.sort((a, b) => b.score - a.score)
.slice(0, 8);
summary = `Impact map across ${touched} selected file(s).`;
addUnique(understanding.constraints, `Refactor impact spans ${touched} file(s).`);
collectedData = {
...collectedData,
touchedFiles: selectedFiles.slice(0, 20),
impactSignals: [
`selected=${selectedFiles.length}`,
`candidates=${candidateFiles.length}`,
],
impactMap: impactRows,
};
break;
}
case "research-symbol-trace": {
const structuralSymbols = Object.values(fileAnalysis)
.flatMap(fa => fa.structural?.functions?.map(fn => fn.name).filter(Boolean) ?? [])
.slice(0, 24);
const fallbackSymbols = corpus
.flatMap(file => Array.from(file.content.matchAll(/\b(function|class|const|let|var)\s+([A-Za-z_]\w*)/g)).map(m => m[2]))
.filter(Boolean);
const symbolPool = Array.from(new Set([...structuralSymbols, ...fallbackSymbols])).slice(0, 18);
const traceRows = symbolPool
.map(symbol => {
const escaped = symbol.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const re = new RegExp(`\\b${escaped}\\b`, "g");
const files = corpus
.map(file => ({ filePath: file.path, count: this.countRegex(file.content, re) }))
.filter(item => item.count > 0);
return {
symbol,
occurrenceCount: files.reduce((acc, f) => acc + f.count, 0),
files: files.slice(0, 8),
};
})
.filter(row => row.occurrenceCount > 0)
.sort((a, b) => b.occurrenceCount - a.occurrenceCount)
.slice(0, 10);
summary = traceRows.length
? `Traced ${traceRows.length} symbol(s) from corpus.`
: "No structural symbols found; symbol trace used filename-level anchors.";
addUnique(understanding.assumptions, "Symbol trace coverage is partial and based on current selected files.");
collectedData = {
...collectedData,
tracedSymbols: traceRows.map(s => s.symbol),
symbolTrace: traceRows,
analyzedFileCount: Object.values(fileAnalysis).filter(fa => fa?.semanticAnalyzed).length,
};
break;
}
case "research-risk-check": {
const riskPatterns = [
{ id: "empty-catch", description: "Empty catch blocks", pattern: /catch\s*\(\s*[^)]*\)\s*\{\s*\}/g },
{ id: "console-error", description: "Console error logging", pattern: /\bconsole\.error\s*\(/g },
{ id: "forced-exit", description: "Process exit usage", pattern: /\bprocess\.exit\s*\(/g },
{ id: "throws-string", description: "Throwing non-Error values", pattern: /\bthrow\s+['"`]/g },
];
const riskRows = riskPatterns
.map(risk => {
const perFile = corpus
.map(file => ({ filePath: file.path, count: this.countRegex(file.content, risk.pattern) }))
.filter(hit => hit.count > 0);
return {
id: risk.id,
description: risk.description,
totalHits: perFile.reduce((acc, hit) => acc + hit.count, 0),
files: perFile.slice(0, 8),
};
})
.filter(risk => risk.totalHits > 0);
summary = "Recorded baseline risks/assumptions/constraints before transformation.";
addUnique(understanding.risks, "Cross-file regressions are possible without full symbol coverage.");
addUnique(understanding.risks, "Validation should run after each transform step.");
for (const risk of riskRows) {
addUnique(understanding.risks, `${risk.description}: ${risk.totalHits} hit(s)`);
}
collectedData = {
...collectedData,
risks: understanding.risks?.slice(0, 12) ?? [],
assumptions: understanding.assumptions?.slice(0, 12) ?? [],
constraints: understanding.constraints?.slice(0, 12) ?? [],
riskSignals: riskRows,
};
break;
}
case "research-architecture-synthesis": {
const analyzedPaths = Object.entries(fileAnalysis)
.filter(([_, fa]) => fa?.semanticAnalyzed)
.map(([filePath]) => filePath);
const architectureFiles = (analyzedPaths.length > 0 ? analyzedPaths : corpus.map(file => file.path)).slice(0, 8);
understanding.problemStatement =
`Summarize repository architecture and identify weak coupling points across ${selectedFiles.length} scoped file(s).`;
for (const p of architectureFiles) {
const base = path.basename(p);
if (base.toLowerCase().includes("registry")) {
addUnique(understanding.hotspots, `${base}: central registry point with broad module fan-in.`);
addUnique(understanding.couplingPoints, `${base}: centralized module registration coupling.`);
}
if (base.toLowerCase().includes("module")) {
addUnique(understanding.sharedPatterns, `${base}: module-oriented pipeline pattern.`);
}
}
addUnique(understanding.sharedPatterns, "Pipeline modules follow a shared Module/ModuleIO contract.");
addUnique(understanding.couplingPoints, "Shared config/model utilities create cross-module coupling.");
addUnique(understanding.hotspots, "Core orchestration and registry layers are high-impact change zones.");
summary = `Architecture synthesis completed from ${architectureFiles.length} analyzed file(s).`;
const priorResearch = (this.context.task?.taskSteps ?? [])
.filter(step => step.action?.startsWith("research-") && step.status === "completed")
.map(step => ({
action: step.action,
summary: step.result?.research?.summary,
}));
collectedData = {
...collectedData,
architectureInputFiles: architectureFiles,
priorResearchSummaries: priorResearch,
problemStatement: understanding.problemStatement ?? "",
sharedPatterns: understanding.sharedPatterns?.slice(0, 12) ?? [],
hotspots: understanding.hotspots?.slice(0, 12) ?? [],
couplingPoints: understanding.couplingPoints?.slice(0, 12) ?? [],
};
break;
}
default: {
summary = `Unknown research action: ${taskStep.action}`;
collectedData = {
...collectedData,
warning: "No handler for research action",
};
break;
}
}
const completedAt = new Date().toISOString();
const researchEntry = {
action: taskStep.action,
summary,
collectedData,
selectedFileCount: selectedFiles.length,
completedAt,
};
taskStep.result || (taskStep.result = {});
taskStep.result.research = researchEntry;
taskStep.result.stepReasoning = {
nextAction: "complete",
rationale: `Research step completed: ${summary}`,
confidence: 0.95,
};
taskStep.status = "completed";
this.persistResearchArtifact(researchEntry);
logInputOutput("runResearchStep", "output", {
research: researchEntry,
stepReasoning: taskStep.result.stepReasoning,
status: taskStep.status,
});
}
/**
* Persists normalized research outputs into analysis.researchArtifacts.
* Example: latestByAction["research-risk-check"] stores current risk findings.
*/
persistResearchArtifact(entry) {
var _a, _b;
(_a = this.context).analysis || (_a.analysis = {});
const store = (_b = this.context.analysis).researchArtifacts || (_b.researchArtifacts = {
latestByAction: {},
history: [],
touchedFiles: [],
lastUpdatedAt: entry.completedAt,
});
store.latestByAction || (store.latestByAction = {});
store.history || (store.history = []);
store.touchedFiles || (store.touchedFiles = []);
store.latestByAction[entry.action] = entry;
store.history.push(entry);
const data = entry.collectedData ?? {};
const touched = this.extractPathsFromResearchData(data);
const merged = new Set([...(store.touchedFiles ?? []), ...touched]);
store.touchedFiles = Array.from(merged);
store.lastUpdatedAt = entry.completedAt;
}
/**
* Extracts file paths from heterogeneous research payloads.
* Example: impactMap rows and architectureInputFiles are both merged into touchedFiles.
*/
extractPathsFromResearchData(data) {
const paths = new Set();
const addPath = (value) => {
if (typeof value === "string" && value.trim().length > 0) {
paths.add(value);
}
};
const addPathArray = (value) => {
if (!Array.isArray(value))
return;
for (const item of value) {
addPath(item);
}
};
addPathArray(data.corpusPaths);
addPathArray(data.touchedFiles);
addPathArray(data.architectureInputFiles);
if (Array.isArray(data.impactMap)) {
for (const row of data.impactMap) {
addPath(row.filePath);
}
}
if (Array.isArray(data.symbolTrace)) {
for (const row of data.symbolTrace) {
const files = row.files;
if (!Array.isArray(files))
continue;
for (const fileRow of files) {
addPath(fileRow.filePath);
}
}
}
if (Array.isArray(data.riskSignals)) {
for (const row of data.riskSignals) {
const files = row.files;
if (!Array.isArray(files))
continue;
for (const fileRow of files) {
addPath(fileRow.filePath);
}
}
}
return Array.from(paths);
}
/**
* Builds lightweight query terms for deterministic research scanning.
* Example: "error handling test suite" -> ["error","handling","test","suite"].
*/
buildResearchTerms() {
const query = this.context.analysis?.intent?.normalizedQuery ??
this.context.initContext?.userQuery ??
this.query;
const stopWords = new Set([
"the", "and", "for", "with", "from", "this", "that", "what", "how",
"is", "are", "was", "were", "can", "could", "should", "would", "into",
"about", "across", "repo", "codebase", "please",
]);
return Array.from(new Set(query
.toLowerCase()
.split(/[^a-z0-9_]+/g)
.filter(token => token.length >= 3 && !stopWords.has(token)))).slice(0, 10);
}
/**
* Collects research candidate paths from selected, candidate, related, and working files.
* Example: selected files are prioritized before broader related file pool.
*/
collectResearchPaths(maxPaths) {
const focus = this.context.analysis?.focus;
const workingPaths = (this.context.workingFiles ?? []).map(file => file.path);
const related = this.context.initContext?.relatedFiles ?? [];
const combined = [
...(focus?.selectedFiles ?? []),
...(focus?.candidateFiles ?? []),
...workingPaths,
...related,
];
const unique = Array.from(new Set(combined));
return unique
.filter(filePath => !filePath.startsWith("__research__/") && fs.existsSync(filePath))
.slice(0, maxPaths);
}
/**
* Reads a bounded corpus from candidate paths.
* Example: read first 12 files, max 12k chars per file, skipping binary payloads.
*/
loadResearchCorpus(filePaths, maxFiles, maxCharsPerFile) {
const corpus = [];
for (const filePath of filePaths.slice(0, maxFiles)) {
try {
const raw = fs.readFileSync(filePath, "utf-8");
if (raw.includes("\u0000"))
continue;
const content = raw.slice(0, maxCharsPerFile);
corpus.push({
path: filePath,
content,
lineCount: content.split("\n").length,
charCount: content.length,
});
}
catch {
// Ignore unreadable files and continue.
}
}
return corpus;
}
/**
* Counts regex matches safely.
* Example: countRegex(code, /import/g) -> number of import occurrences.
*/
countRegex(content, pattern) {
const source = pattern.source;
const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`;
const re = new RegExp(source, flags);
return Array.from(content.matchAll(re)).length;
}
/**
* Computes per-term match counts for a file body.
* Example: terms ["error","test"] -> { error: 4, test: 2 }.
*/
computeTermHits(content, terms) {
const hits = {};
for (const term of terms) {
const escaped = term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const count = this.countRegex(content, new RegExp(`\\b${escaped}\\b`, "gi"));
if (count > 0) {
hits[term] = count;
}
}
return hits;
}
/* ───────────── step executor ───────────── */
/**
* Executes a single step using its corresponding module.
*
* @param step - The step to execute.
* @param input - The input data for the step.
* @returns A promise resolving to the output of the step.
* @throws If the module is not found or execution fails.
*/
async executeStep(step, input) {
const stop = this.startTimer();
this.context.currentStep = step;
const mod = resolveModuleForAction(step.action);
if (!mod) {
this.logLine("EXECUTE", step.action, stop(), "skipped (missing module)");
return { query: input.query, content: input.content, data: { skipped: true } };
}
try {
this.ui.update(`Running step: ${step.action}`);
const output = await mod.run({ query: step.description ?? input.query, content: input.data ?? input.content, context: this.context });
const errors = Array.isArray(output.data?.errors)
? output.data.errors.filter((e) => typeof e === "string" && e.trim().length > 0)
: [];
if (errors.length > 0) {
const detail = errors.slice(0, 2).join(" | ");
this.logLine("EXECUTE", step.action, stop(), `completed with errors: ${detail}`);
console.error(`[${step.action}] ${errors.join(" | ")}`);
}
return output;
}
catch (err) {
this.logLine("EXECUTE", step.action, stop(), "failed");
throw err;
}
}
/* ───────────── extracted from runSearch ───────────── */
resolveInitialRetrievalQueries() {
const rawUserQuery = this.context.initContext?.userQuery ?? this.query;
const retrievalQuery = this.context.analysis?.intent?.normalizedQuery?.trim() || rawUserQuery;
return { rawUserQuery, retrievalQuery };
}
async fetchInitialRetrievalResults(retrievalQuery) {
return semanticSearchFiles(retrievalQuery, RELATED_FILES_LIMIT, this.context.analysis?.intent ?? {});
}
mapSearchResultToTopFile(result) {
return {
id: result.id,
path: result.path,
summary: result.summary ?? undefined,
bm25Score: result.bm25Score,
};
}
mapSearchResultToRelatedFile(result) {
return {
id: result.id,
path: result.path,
summary: result.summary ?? undefined,
bm25Score: result.bm25Score,
};
}
buildInitialRetrievalPromptArgs(results, retrievalQuery) {
const topFiles = results
.slice(0, NUM_TOPFILES)
.map(result => this.mapSearchResultToTopFile(result));
const relatedFiles = results
.slice(NUM_TOPFILES)
.map(result => this.mapSearchResultToRelatedFile(result));
const queryExpansionTerms = results.find(result => Array.isArray(result.queryExpansionTerms))?.queryExpansionTerms;
return {
topFiles,
relatedFiles,
query: retrievalQuery,
queryExpansionTerms,
};
}
mergeSeededInitialContext(rawUserQuery, seededContext) {
// Merge retrieval seed into initContext without losing previously discovered files.
// Example: keep old relatedFiles and append newly seeded files from buildLightContext.
const existingInit = this.context.initContext ?? { userQuery: rawUserQuery };
const seededInit = seededContext.initContext;
const mergedRelatedFiles = Array.from(new Set([
...(existingInit.relatedFiles ?? []),
...(seededInit?.relatedFiles ?? []),
]));
const mergedScores = {
...(existingInit.relatedFileScores ?? {}),
...(seededInit?.relatedFileScores ?? {}),
};
const mergedQueryExpansionTerms = Array.from(new Set([
...(existingInit.queryExpansionTerms ?? []),
...(seededInit?.queryExpansionTerms ?? []),
]));
this.context.initContext = {
...existingInit,
...(seededInit ?? {}),
userQuery: rawUserQuery,
relatedFiles: mergedRelatedFiles,
relatedFileScores: mergedScores,
queryExpansionTerms: mergedQueryExpansionTerms,
folderCapsules: (seededInit?.folderCapsules?.length
? seededInit.folderCapsules
: existingInit.folderCapsules) ?? [],
};
return mergedRelatedFiles.length;
}
applyDeterministicPreGroundingPrefilter(retrievalQuery) {
// Rank and cap retrieval candidates before grounding to reduce noisy evidence passes.
// Example: explicit filename anchors are always kept even if BM25 score is low.
const init = this.context.