lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
1,630 lines (1,502 loc) • 49 kB
JavaScript
const path = require("path");
const fsp = require("fs/promises");
const fg = require("fast-glob");
const db = require("../db");
const {
workspaceRoot,
resolveWorkspacePath,
writeFile: writeWorkspaceFile,
} = require("../workspace");
const { runProcess } = require("../tools/process");
const { parseFile } = require("./parser");
const { analyzeFile } = require("./navigation");
const logger = require("../logger");
const { getTestSummary } = require("../tests");
const DEFAULT_GLOBS = ["**/*"];
const DEFAULT_IGNORE = ["node_modules/**", ".git/**", "data/**", "tmp/**"];
const MAX_FALLBACK_FILE_SIZE = 1024 * 1024; // 1MB
const MAX_RESULTS = 200;
const MAX_SYMBOL_FILE_SIZE = 512 * 1024; // 512KB
const DEFAULT_GRAPH_LIMIT = 250;
const CLAUDE_DOC_HEADER = "<!-- Generated by claude-code indexer -->";
const COMMON_DEP_EXTENSIONS = [
"js",
"jsx",
"ts",
"tsx",
"mjs",
"cjs",
"json",
"py",
"rb",
"go",
"rs",
"java",
"cs",
"cpp",
"c",
"h",
"hpp",
"swift",
"kt",
"kts",
"scala",
"sql",
"md",
];
const STYLE_GUIDE_RULES = [
{
match: (info) => path.basename(info.path) === ".editorconfig",
tool: "editorconfig",
detail: "EditorConfig configuration",
},
{
match: (info) =>
/^\.eslintrc(\..*)?$/.test(path.basename(info.path)) ||
["eslint.config.js", "eslint.config.cjs", "eslint.config.mjs"].includes(info.path),
tool: "eslint",
detail: "ESLint configuration",
},
{
match: (info) =>
/^\.?prettierrc(\..*)?$/.test(path.basename(info.path)) ||
["prettier.config.js", "prettier.config.cjs", "prettier.config.mjs"].includes(info.path),
tool: "prettier",
detail: "Prettier configuration",
},
{
match: (info) =>
/^\.stylelintrc(\..*)?$/.test(path.basename(info.path)) ||
info.path === "stylelint.config.js",
tool: "stylelint",
detail: "Stylelint configuration",
},
{
match: (info) => info.path === ".clang-format" || info.path === ".clang-tidy",
tool: "clang_format",
detail: "Clang formatting configuration",
},
{
match: (info) =>
info.path === ".flake8" ||
info.path === ".pylintrc" ||
info.path === ".pep8",
tool: "python_lint",
detail: "Python lint configuration",
},
{
match: (info) => info.path === "pyproject.toml",
tool: "pyproject",
detail: "Python pyproject configuration",
},
{
match: (info) => info.path === "setup.cfg",
tool: "python_setup_cfg",
detail: "Python setup.cfg configuration",
},
];
function inferStyleGuides(fileInfos) {
const matches = [];
const seen = new Set();
for (const info of fileInfos) {
for (const rule of STYLE_GUIDE_RULES) {
let matched = false;
try {
matched = rule.match(info);
} catch (err) {
logger.debug({ err, file: info.path }, "Failed evaluating style guide rule");
}
if (matched) {
const key = `${rule.tool}:${info.path}`;
if (!seen.has(key)) {
seen.add(key);
matches.push({
tool: rule.tool,
path: info.path,
detail: rule.detail,
});
}
}
}
}
matches.sort((a, b) => a.tool.localeCompare(b.tool) || a.path.localeCompare(b.path));
if (matches.length === 0) {
matches.push({
tool: "general",
path: null,
detail: "No explicit style guides detected. Consider adding lint/format tooling for key languages.",
});
}
return matches;
}
function synthesiseStyleGuideInsights(styleGuides, languages) {
if (!Array.isArray(styleGuides) || styleGuides.length === 0) {
return ["No style configuration detected."];
}
const insights = [];
const languagesMentioned = new Set(languages ?? []);
for (const guide of styleGuides) {
if (guide.path) {
insights.push(`${guide.tool}: ${guide.path} (${guide.detail})`);
} else {
insights.push(`${guide.tool}: ${guide.detail}`);
}
}
if (languagesMentioned.has("python") && !styleGuides.some((guide) => guide.tool.startsWith("python"))) {
insights.push("Python present but no lint/format configuration detected.");
}
return insights;
}
function normalisePatterns(patterns) {
if (!patterns) return DEFAULT_GLOBS;
if (typeof patterns === "string") return [patterns];
if (Array.isArray(patterns) && patterns.length > 0) return patterns.map(String);
return DEFAULT_GLOBS;
}
function normaliseIgnore(ignore) {
if (!ignore) return DEFAULT_IGNORE;
if (typeof ignore === "string") return [...DEFAULT_IGNORE, ignore];
if (Array.isArray(ignore)) return [...DEFAULT_IGNORE, ...ignore.map(String)];
return DEFAULT_IGNORE;
}
async function listWorkspaceFiles(options = {}) {
const patterns = normalisePatterns(options.patterns);
const ignore = normaliseIgnore(options.ignore);
const limit = Number.isInteger(options.limit) && options.limit > 0 ? options.limit : 1000;
const includeDirectories = options.includeDirectories === true;
const entries = await fg(patterns, {
cwd: workspaceRoot,
ignore,
dot: false,
onlyFiles: !includeDirectories,
markDirectories: includeDirectories,
unique: true,
followSymbolicLinks: false,
});
const sliced = entries.slice(0, limit);
if (!options.withStats) {
return sliced.map((entry) => ({
path: entry,
type: entry.endsWith("/") ? "directory" : "file",
}));
}
const results = [];
for (const entry of sliced) {
try {
const absolute = resolveWorkspacePath(entry);
const stats = await fsp.stat(absolute);
results.push({
path: entry,
type: stats.isDirectory() ? "directory" : "file",
size: stats.size,
mtimeMs: stats.mtimeMs,
});
} catch (err) {
logger.warn({ err, entry }, "Failed to stat workspace entry");
}
}
return results;
}
function parseRipgrepJson(stdout, limit) {
const lines = stdout.split("\n").filter((line) => line.trim().length > 0);
const results = [];
for (const line of lines) {
if (results.length >= limit) break;
let parsed;
try {
parsed = JSON.parse(line);
} catch {
continue;
}
if (parsed.type !== "match") continue;
const data = parsed.data;
const pathText = data?.path?.text;
const linesText = data?.lines?.text;
const submatches = data?.submatches ?? [];
if (!pathText || typeof linesText !== "string") continue;
const relativePath = path.relative(workspaceRoot, path.resolve(workspaceRoot, pathText));
results.push({
path: relativePath,
line: data.line_number,
column: submatches[0]?.start ?? null,
match: linesText.trimEnd(),
});
}
return results;
}
async function searchWithRipgrep({ query, regex, limit, ignore }) {
const args = [
"--json",
"--no-heading",
"--hidden",
"--line-number",
"--column",
"--color=never",
`--max-count=${limit}`,
];
ignore.forEach((glob) => {
args.push(`--glob=!${glob}`);
});
if (!regex) {
args.push("--fixed-strings");
}
args.push(query);
args.push(".");
const result = await runProcess({
command: "rg",
args,
cwd: workspaceRoot,
env: {},
timeoutMs: 10000,
});
if (result.exitCode !== 0 && result.exitCode !== 1) {
const error = new Error("ripgrep returned an error.");
error.stdout = result.stdout;
error.stderr = result.stderr;
throw error;
}
return parseRipgrepJson(result.stdout, limit);
}
async function readFileExcerpt(relativePath, limitBytes = MAX_FALLBACK_FILE_SIZE) {
try {
const absolute = resolveWorkspacePath(relativePath);
const stats = await fsp.stat(absolute);
if (!stats.isFile() || stats.size > limitBytes) {
return null;
}
const content = await fsp.readFile(absolute, "utf8");
return content;
} catch (err) {
logger.warn({ err, relativePath }, "Failed to read file during fallback search");
return null;
}
}
const LANGUAGE_EXTENSIONS = {
js: "javascript",
mjs: "javascript",
cjs: "javascript",
ts: "typescript",
tsx: "typescript-react",
jsx: "javascript-react",
py: "python",
rb: "ruby",
java: "java",
go: "go",
rs: "rust",
php: "php",
cs: "csharp",
cpp: "cpp",
cxx: "cpp",
cc: "cpp",
h: "c-header",
hpp: "cpp-header",
json: "json",
yaml: "yaml",
yml: "yaml",
md: "markdown",
sh: "shell",
bash: "shell",
zsh: "shell",
fish: "shell",
swift: "swift",
kt: "kotlin",
kts: "kotlin",
scala: "scala",
sql: "sql",
};
function extractSymbols(relativePath, content, language) {
if (!language || typeof content !== "string") return [];
const lang = language.toLowerCase();
const lines = content.split(/\r?\n/);
const symbols = [];
const pushSymbol = (name, kind, lineIndex, column = 1, metadata) => {
if (!name) return;
symbols.push({
name,
kind,
line: lineIndex + 1,
column,
metadata: metadata ?? null,
});
};
const simpleMatch = (regex, kind) => {
lines.forEach((line, idx) => {
const match = line.match(regex);
if (match && match[1]) {
const col = line.indexOf(match[1]) + 1 || 1;
pushSymbol(match[1], kind, idx, col);
}
});
};
switch (lang) {
case "javascript":
case "javascript-react":
case "typescript":
case "typescript-react":
simpleMatch(/\bfunction\s+([A-Za-z0-9_$]+)\s*\(/, "function");
simpleMatch(/\bclass\s+([A-Za-z0-9_$]+)/, "class");
simpleMatch(/\bconst\s+([A-Za-z0-9_$]+)\s*=\s*(?:async\s*)?\(/, "function");
simpleMatch(/\bexport\s+default\s+function\s+([A-Za-z0-9_$]+)\s*\(/, "function");
break;
case "python":
simpleMatch(/^\s*def\s+([A-Za-z0-9_]+)\s*\(/, "function");
simpleMatch(/^\s*class\s+([A-Za-z0-9_]+)/, "class");
break;
case "go":
lines.forEach((line, idx) => {
const match = line.match(/^\s*func\s+(?:\([^)]+\)\s*)?([A-Za-z0-9_]+)\s*\(/);
if (match && match[1]) {
pushSymbol(match[1], "function", idx, line.indexOf(match[1]) + 1);
}
});
break;
case "java":
case "csharp":
simpleMatch(/\bclass\s+([A-Za-z0-9_]+)/, "class");
simpleMatch(/\binterface\s+([A-Za-z0-9_]+)/, "interface");
simpleMatch(/\benum\s+([A-Za-z0-9_]+)/, "enum");
break;
case "rust":
simpleMatch(/\bfn\s+([A-Za-z0-9_]+)\s*\(/, "function");
simpleMatch(/\bstruct\s+([A-Za-z0-9_]+)/, "struct");
simpleMatch(/\benum\s+([A-Za-z0-9_]+)/, "enum");
break;
case "php":
simpleMatch(/\bfunction\s+([A-Za-z0-9_]+)\s*\(/, "function");
simpleMatch(/\bclass\s+([A-Za-z0-9_]+)/, "class");
break;
case "ruby":
simpleMatch(/^\s*def\s+([A-Za-z0-9_!?]+)/, "method");
simpleMatch(/^\s*class\s+([A-Za-z0-9_:]+)/, "class");
simpleMatch(/^\s*module\s+([A-Za-z0-9_:]+)/, "module");
break;
case "markdown":
lines.forEach((line, idx) => {
const match = line.match(/^(#+)\s+(.*)$/);
if (match) {
const title = match[2].trim();
pushSymbol(title, `heading_${match[1].length}`, idx, line.indexOf(title) + 1);
}
});
break;
default:
break;
}
return symbols;
}
const clearFilesStmt = db.prepare("DELETE FROM files");
const clearSymbolsStmt = db.prepare("DELETE FROM symbols");
const clearSymbolRefsStmt = db.prepare("DELETE FROM symbol_references");
const clearFrameworkStmt = db.prepare("DELETE FROM framework_signals");
const clearDependenciesStmt = db.prepare("DELETE FROM file_dependencies");
const upsertMetadataStmt = db.prepare(
`INSERT INTO workspace_metadata (key, value)
VALUES (@key, @value)
ON CONFLICT(key) DO UPDATE SET value=excluded.value`,
);
const selectMetadataStmt = db.prepare("SELECT value FROM workspace_metadata WHERE key = ?");
const insertFileStmt = db.prepare(
`INSERT INTO files (path, size_bytes, mtime_ms, language, summary)
VALUES (@path, @size_bytes, @mtime_ms, @language, @summary)
ON CONFLICT(path) DO UPDATE SET
size_bytes=excluded.size_bytes,
mtime_ms=excluded.mtime_ms,
language=excluded.language,
summary=excluded.summary`,
);
const insertFrameworkStmt = db.prepare(
`INSERT INTO framework_signals (type, file_path, detail, metadata)
VALUES (@type, @file_path, @detail, @metadata)`,
);
const insertDependencyStmt = db.prepare(
`INSERT INTO file_dependencies (from_path, to_path, kind, metadata)
VALUES (@from_path, @to_path, @kind, @metadata)`,
);
const insertSymbolStmt = db.prepare(
`INSERT INTO symbols (file_path, name, kind, line, column, metadata)
VALUES (@file_path, @name, @kind, @line, @column, @metadata)`,
);
const insertSymbolReferenceStmt = db.prepare(
`INSERT INTO symbol_references (symbol_id, file_path, line, column, snippet, metadata)
VALUES (@symbol_id, @file_path, @line, @column, @snippet, @metadata)`,
);
const selectDefinitionByLocationStmt = db.prepare(
`SELECT s.name,
s.kind,
s.file_path AS definition_path,
s.line AS definition_line,
s.column AS definition_column,
r.file_path AS reference_path,
r.line AS reference_line,
r.column AS reference_column,
r.snippet,
ABS(COALESCE(r.column, 0) - COALESCE(@column, 0)) AS column_distance
FROM symbol_references r
JOIN symbols s ON r.symbol_id = s.id
WHERE r.file_path = @filePath
AND r.line = @line
AND (@column IS NULL OR r.column = @column)
ORDER BY column_distance ASC,
s.name ASC
LIMIT @limit`,
);
const selectDefinitionsBySymbolStmt = db.prepare(
`SELECT name,
kind,
file_path,
line,
column,
metadata
FROM symbols
WHERE name = @name
ORDER BY line ASC, file_path ASC
LIMIT @limit`,
);
function inferLanguage(relativePath) {
const ext = path.extname(relativePath).replace(".", "").toLowerCase();
return LANGUAGE_EXTENSIONS[ext] ?? null;
}
function summariseDependencies(packageJson) {
if (!packageJson) return null;
const dependencies = Object.keys(packageJson.dependencies ?? {});
const devDependencies = Object.keys(packageJson.devDependencies ?? {});
const picks = [...dependencies.slice(0, 5), ...devDependencies.slice(0, 3)];
if (picks.length === 0) return null;
return {
sampleDependencies: picks,
totalDependencies: dependencies.length,
totalDevDependencies: devDependencies.length,
};
}
function escapeRegex(value) {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
// Regex pattern cache for performance
const regexCache = new Map();
const MAX_REGEX_CACHE_SIZE = 10000;
function getCachedRegex(symbolName) {
if (!regexCache.has(symbolName)) {
const escaped = escapeRegex(symbolName);
const regex = new RegExp(`\\b${escaped}\\b`, "g");
regexCache.set(symbolName, regex);
// Prevent unbounded growth
if (regexCache.size > MAX_REGEX_CACHE_SIZE) {
const firstKey = regexCache.keys().next().value;
regexCache.delete(firstKey);
}
}
return regexCache.get(symbolName);
}
function safeParseJson(value, fallback = null) {
if (value === null || value === undefined) return fallback;
if (typeof value !== "string") return fallback;
try {
return JSON.parse(value);
} catch (err) {
logger.debug({ err }, "Failed to parse JSON metadata");
return fallback;
}
}
function detectFrameworks(fileInfos) {
const frameworks = new Set();
const signals = [];
const addSignal = (type, filePath, detail, metadata) => {
frameworks.add(type);
signals.push({
type,
file_path: filePath,
detail,
metadata: metadata ? JSON.stringify(metadata) : null,
});
};
const fileMap = new Map(fileInfos.map((info) => [info.path, info]));
if (fileMap.has("package.json")) {
const info = fileMap.get("package.json");
const metadata = summariseDependencies(info.packageJson);
addSignal("node", info.path, "package.json detected", metadata);
if (metadata?.sampleDependencies?.some((dep) => dep.includes("react"))) {
addSignal("react", info.path, "React dependency detected", metadata);
}
if (metadata?.sampleDependencies?.some((dep) => dep.includes("next"))) {
addSignal("nextjs", info.path, "Next.js dependency detected", metadata);
}
if (metadata?.sampleDependencies?.some((dep) => dep.includes("express"))) {
addSignal("express", info.path, "Express dependency detected", metadata);
}
}
if (fileMap.has("requirements.txt")) {
addSignal("python", "requirements.txt", "requirements.txt detected");
}
if (fileMap.has("pyproject.toml")) {
addSignal("python", "pyproject.toml", "pyproject.toml detected");
}
if (fileMap.has("Pipfile")) {
addSignal("python", "Pipfile", "Pipfile detected");
}
if (fileMap.has("Gemfile")) {
addSignal("ruby", "Gemfile", "Gemfile detected");
}
if (fileMap.has("pom.xml")) {
addSignal("java", "pom.xml", "Maven project detected");
}
if (fileMap.has("build.gradle") || fileMap.has("build.gradle.kts")) {
addSignal("gradle", "build.gradle(.kts)", "Gradle build file detected");
}
if (fileMap.has("go.mod")) {
addSignal("go", "go.mod", "Go module detected");
}
if (fileMap.has("Cargo.toml")) {
addSignal("rust", "Cargo.toml", "Cargo crate detected");
}
if (fileMap.has("composer.json")) {
addSignal("php", "composer.json", "Composer project detected");
}
return {
frameworks: Array.from(frameworks),
signals,
};
}
function computeTopDependencies(fileInfos, dependenciesRaw) {
const counts = new Map();
dependenciesRaw.forEach((dep) => {
if (!dep.to_path || dep.to_path.startsWith("..")) return;
const key = dep.to_path;
counts.set(key, (counts.get(key) ?? 0) + 1);
});
return Array.from(counts.entries())
.map(([path, count]) => ({ path, count }))
.sort((a, b) => b.count - a.count)
.slice(0, 10);
}
function computeDependencyGraph({ dependenciesRaw, limit = DEFAULT_GRAPH_LIMIT }) {
const dependencies = Array.isArray(dependenciesRaw) ? dependenciesRaw : [];
const nodes = new Map();
const edges = [];
let totalEdges = 0;
for (const dep of dependencies) {
if (!dep.from_path || !dep.to_path) continue;
const fromExt = path.extname(dep.from_path).replace(".", "").toLowerCase();
const toExt = path.extname(dep.to_path).replace(".", "").toLowerCase();
if (!COMMON_DEP_EXTENSIONS.includes(fromExt) || !COMMON_DEP_EXTENSIONS.includes(toExt)) {
continue;
}
if (!nodes.has(dep.from_path)) {
nodes.set(dep.from_path, {
id: dep.from_path,
language: path.extname(dep.from_path).replace(".", "") || null,
edgesOut: 0,
edgesIn: 0,
});
}
if (!nodes.has(dep.to_path)) {
nodes.set(dep.to_path, {
id: dep.to_path,
language: path.extname(dep.to_path).replace(".", "") || null,
edgesOut: 0,
edgesIn: 0,
});
}
nodes.get(dep.from_path).edgesOut += 1;
nodes.get(dep.to_path).edgesIn += 1;
let metadata = null;
if (dep.metadata !== null && dep.metadata !== undefined) {
if (typeof dep.metadata === "string") {
try {
metadata = JSON.parse(dep.metadata);
} catch (err) {
metadata = { raw: dep.metadata };
}
} else {
metadata = dep.metadata;
}
}
edges.push({
from: dep.from_path,
to: dep.to_path,
kind: dep.kind ?? "reference",
metadata,
});
totalEdges += 1;
if (edges.length >= limit) break;
}
const topNodes = Array.from(nodes.values())
.sort((a, b) => b.edgesOut + b.edgesIn - (a.edgesOut + a.edgesIn))
.slice(0, limit);
const nodeIds = new Set(topNodes.map((node) => node.id));
const filteredEdges = edges.filter((edge) => nodeIds.has(edge.from) && nodeIds.has(edge.to));
return {
nodes: topNodes,
edges: filteredEdges,
totalNodes: nodes.size,
totalEdges,
limitApplied: edges.length >= limit,
};
}
function summariseFrameworkSignals(frameworkSignals = []) {
const groups = frameworkSignals.reduce((acc, signal) => {
const key = signal.type ?? "other";
const list = acc.get(key) ?? [];
list.push(signal);
acc.set(key, list);
return acc;
}, new Map());
return Array.from(groups.entries()).map(([framework, signals]) => ({
framework,
count: signals.length,
samples: signals.slice(0, 5),
}));
}
function buildClaudeDocContent(summary) {
const lines = [];
lines.push(CLAUDE_DOC_HEADER);
lines.push("");
lines.push("# Project Overview");
lines.push("");
lines.push(`- Workspace root: \`${summary.workspaceRoot}\``);
lines.push(`- Indexed at: ${summary.indexedAt}`);
lines.push(`- Files indexed: ${summary.fileCount}`);
lines.push("");
if (Array.isArray(summary.languageStats) && summary.languageStats.length) {
lines.push("## Language Mix");
lines.push("");
summary.languageStats.slice(0, 10).forEach((lang) => {
lines.push(`- ${lang.language}: ${lang.files} files (${lang.percentage}%)`);
});
lines.push("");
}
if (Array.isArray(summary.frameworks) && summary.frameworks.length) {
lines.push("## Framework Signals");
lines.push("");
const grouped = summariseFrameworkSignals(summary.frameworkSignals);
grouped.forEach((item) => {
lines.push(`- **${item.framework}** (${item.count} signals)`);
item.samples.forEach((signal) => {
lines.push(` - ${signal.detail} (${signal.file_path})`);
});
});
lines.push("");
}
if (Array.isArray(summary.styleGuideInsights) && summary.styleGuideInsights.length) {
lines.push("## Style Guide Insights");
lines.push("");
summary.styleGuideInsights.forEach((insight) => {
lines.push(`- ${insight}`);
});
lines.push("");
}
if (Array.isArray(summary.topDependencies) && summary.topDependencies.length) {
lines.push("## Top Workspace Dependencies");
lines.push("");
summary.topDependencies.slice(0, 10).forEach((dep) => {
lines.push(`- ${dep.path} (refs: ${dep.count})`);
});
lines.push("");
}
if (summary.dependencyGraph?.edges?.length) {
lines.push("## Dependency Graph Snapshot");
lines.push("");
lines.push(
`Graph nodes: ${summary.dependencyGraph.nodes.length} (of ${summary.dependencyGraph.totalNodes}), edges: ${summary.dependencyGraph.edges.length} (of ${summary.dependencyGraph.totalEdges})`,
);
lines.push("");
const sampleEdges = summary.dependencyGraph.edges.slice(0, 15);
sampleEdges.forEach((edge) => {
lines.push(`- \`${edge.from}\` → \`${edge.to}\` (${edge.kind})`);
});
lines.push("");
}
lines.push("## Re-index Guidance");
lines.push("");
lines.push(
"Run `workspace_index_rebuild` to refresh this document after making large changes.",
);
lines.push("");
return `${lines.join("\n").trim()}\n`;
}
async function ensureClaudeDoc(summary) {
const content = buildClaudeDocContent(summary);
let existing = null;
try {
const absolute = resolveWorkspacePath("CLAUDE.md");
existing = await fsp.readFile(absolute, "utf8");
} catch (err) {
if (err.code !== "ENOENT") {
throw err;
}
}
if (existing && !existing.startsWith(CLAUDE_DOC_HEADER)) {
logger.debug("Skipping CLAUDE.md overwrite; existing file is user-authored.");
return;
}
if (existing && existing.trim() === content.trim()) {
return;
}
await writeWorkspaceFile("CLAUDE.md", content, { encoding: "utf8", createParents: true });
logger.info("Updated CLAUDE.md project overview");
}
function buildProjectSummary({ fileInfos, frameworks, dependenciesRaw }) {
const languageCounts = fileInfos.reduce((acc, item) => {
if (item.language) {
acc[item.language] = (acc[item.language] ?? 0) + 1;
}
return acc;
}, {});
const languageStats = Object.entries(languageCounts)
.map(([language, count]) => ({
language,
files: count,
percentage: Number(((count / Math.max(fileInfos.length, 1)) * 100).toFixed(2)),
}))
.sort((a, b) => b.files - a.files);
const styleGuides = inferStyleGuides(fileInfos);
const styleGuideInsights = synthesiseStyleGuideInsights(
styleGuides,
languageStats.map((item) => item.language),
);
const summary = {
indexedAt: new Date().toISOString(),
workspaceRoot,
fileCount: fileInfos.length,
languages: languageStats.map((item) => item.language),
languageStats,
frameworks: frameworks.frameworks.sort(),
frameworkSignals: frameworks.signals.map((signal) => ({
type: signal.type,
file_path: signal.file_path,
detail: signal.detail,
})),
topDependencies: computeTopDependencies(fileInfos, dependenciesRaw),
dependencyGraph: computeDependencyGraph({ dependenciesRaw }),
styleGuides,
styleGuideInsights,
tests: getTestSummary({ includeRecent: false }),
};
return summary;
}
function storeProjectSummary(summary) {
upsertMetadataStmt.run({
key: "project_summary",
value: JSON.stringify(summary),
});
upsertMetadataStmt.run({
key: "last_indexed_at",
value: String(Date.now()),
});
}
function readProjectSummary() {
const row = selectMetadataStmt.get("project_summary");
if (!row) return null;
try {
return JSON.parse(row.value);
} catch (err) {
logger.warn({ err }, "Failed to parse project summary metadata");
return null;
}
}
const FALLBACK_REFERENCE_SAMPLE_LIMIT = 2000;
function normaliseDefinition(definition, { engine, relativePath }) {
if (!definition || typeof definition.name !== "string") return null;
const name = definition.name.trim();
if (!name) return null;
const line =
Number.isFinite(definition.line) && definition.line > 0
? Math.trunc(definition.line)
: Number.isFinite(definition.start?.line) && definition.start.line > 0
? Math.trunc(definition.start.line)
: Number.isFinite(definition.loc?.start?.line) && definition.loc.start.line > 0
? Math.trunc(definition.loc.start.line)
: null;
if (!line) return null;
let column =
Number.isFinite(definition.column) && definition.column > 0
? Math.trunc(definition.column)
: Number.isFinite(definition.start?.column) && definition.start.column > 0
? Math.trunc(definition.start.column)
: Number.isFinite(definition.loc?.start?.column) && definition.loc.start.column > 0
? Math.trunc(definition.loc.start.column)
: 1;
if (column <= 0) column = 1;
const metadata =
typeof definition.metadata === "object" && definition.metadata !== null
? { ...definition.metadata }
: {};
if (engine) metadata.engine = engine;
if (relativePath && !metadata.filePath) {
metadata.filePath = relativePath;
}
return {
name,
kind: definition.kind ?? definition.type ?? null,
line,
column,
metadata: Object.keys(metadata).length ? metadata : null,
};
}
function normaliseDependency(dep, { engine, fromPath }) {
if (!dep || typeof dep.path !== "string") return null;
const pathValue = dep.path.trim();
if (!pathValue) return null;
const kind = dep.kind ?? dep.type ?? "reference";
const metadata =
typeof dep.metadata === "object" && dep.metadata !== null ? { ...dep.metadata } : {};
if (dep.clause && !metadata.clause) {
metadata.clause = dep.clause;
}
if (dep.line && Number.isFinite(dep.line)) {
metadata.line = Math.trunc(dep.line);
}
if (dep.column && Number.isFinite(dep.column)) {
metadata.column = Math.trunc(dep.column);
}
if (engine) metadata.engine = engine;
if (fromPath && !metadata.fromPath) {
metadata.fromPath = fromPath;
}
return {
from_path: fromPath ?? null,
to_path: pathValue,
kind,
metadata: Object.keys(metadata).length ? metadata : null,
};
}
async function rebuildWorkspaceIndex(options = {}) {
const patterns = normalisePatterns(options.patterns);
const ignore = normaliseIgnore(options.ignore);
logger.info(
{
workspaceRoot,
patterns,
ignore,
},
"Rebuilding workspace index",
);
const entries = await fg(patterns, {
cwd: workspaceRoot,
ignore,
onlyFiles: true,
dot: true,
followSymbolicLinks: false,
unique: true,
});
const fileInfos = [];
const fileContents = new Map();
const navigationData = new Map();
let referenceCount = 0;
for (const relativePath of entries) {
try {
const absolute = resolveWorkspacePath(relativePath);
const stats = await fsp.stat(absolute);
if (!stats.isFile()) continue;
const language = inferLanguage(relativePath);
const info = {
path: relativePath,
size_bytes: stats.size,
mtime_ms: stats.mtimeMs,
language,
summary: null,
symbols: [],
dependencies: [],
};
if (relativePath === "package.json") {
try {
const pkgRaw = await fsp.readFile(absolute, "utf8");
info.packageJson = JSON.parse(pkgRaw);
} catch (err) {
logger.warn({ err }, "Failed to parse package.json for framework detection");
}
}
if (language && stats.size <= MAX_SYMBOL_FILE_SIZE) {
try {
const content = await fsp.readFile(absolute, "utf8");
fileContents.set(relativePath, content);
let navResult = null;
try {
navResult = analyzeFile({
relativePath,
content,
language,
});
} catch (analysisErr) {
logger.debug(
{ err: analysisErr, relativePath, language },
"Structured navigation analysis failed",
);
}
if (navResult) {
navigationData.set(relativePath, navResult);
const definitions =
(Array.isArray(navResult.definitions) && navResult.definitions.length
? navResult.definitions
: Array.isArray(navResult.symbols)
? navResult.symbols
: []
).map((definition) =>
normaliseDefinition(definition, {
engine: navResult.engine,
relativePath,
}),
).filter(Boolean);
if (definitions.length) {
info.symbols = definitions;
}
const deps =
Array.isArray(navResult.dependencies) && navResult.dependencies.length
? navResult.dependencies
.map((dep) =>
normaliseDependency(dep, {
engine: navResult.engine,
fromPath: relativePath,
}),
)
.filter(Boolean)
: [];
if (deps.length) {
info.dependencies = deps;
}
}
if (!info.symbols.length) {
const parsed = parseFile(relativePath, content, language);
if (parsed) {
const definitions =
(Array.isArray(parsed.definitions) && parsed.definitions.length
? parsed.definitions
: Array.isArray(parsed.symbols)
? parsed.symbols
: []
).map((definition) =>
normaliseDefinition(definition, {
engine: parsed.engine,
relativePath,
}),
).filter(Boolean);
if (definitions.length) {
info.symbols = definitions;
} else if (Array.isArray(parsed.symbols) && parsed.symbols.length) {
info.symbols = parsed.symbols;
}
const deps =
Array.isArray(parsed.dependencies) && parsed.dependencies.length
? parsed.dependencies
.map((dep) =>
normaliseDependency(dep, {
engine: parsed.engine,
fromPath: relativePath,
}),
)
.filter(Boolean)
: [];
if (deps.length) {
info.dependencies = deps;
}
}
}
if (!info.symbols.length) {
info.symbols = extractSymbols(relativePath, content, language);
}
} catch (err) {
logger.debug({ err, relativePath }, "Failed to extract symbols/dependencies for file");
}
}
fileInfos.push(info);
} catch (err) {
logger.warn({ err, relativePath }, "Failed to index file");
}
}
const frameworks = detectFrameworks(fileInfos);
const dependenciesRaw = fileInfos.flatMap((info) => {
if (!Array.isArray(info.dependencies)) return [];
return info.dependencies
.filter((dep) => typeof dep?.to_path === "string" && dep.to_path.trim().length > 0)
.map((dep) => ({
from_path: dep.from_path ?? info.path,
to_path: dep.to_path,
kind: dep.kind ?? "reference",
metadata: dep.metadata ?? null,
}));
});
const dependenciesByFile = dependenciesRaw.reduce((acc, dep) => {
const list = acc.get(dep.from_path) ?? [];
list.push(dep);
acc.set(dep.from_path, list);
return acc;
}, new Map());
const summary = buildProjectSummary({ fileInfos, frameworks, dependenciesRaw });
const symbolIdRecords = [];
const initialInsertTx = db.transaction(() => {
clearFilesStmt.run();
clearSymbolsStmt.run();
clearSymbolRefsStmt.run();
clearFrameworkStmt.run();
clearDependenciesStmt.run();
for (const info of fileInfos) {
const record = { ...info };
delete record.packageJson;
const symbols = record.symbols ?? [];
const dependencies = dependenciesByFile.get(record.path) ?? [];
const content = fileContents.get(record.path);
delete record.symbols;
delete record.dependencies;
insertFileStmt.run(record);
if (symbols.length) {
symbols.forEach((symbol) => {
const result = insertSymbolStmt.run({
file_path: record.path,
name: symbol.name,
kind: symbol.kind,
line: symbol.line,
column: symbol.column ?? 1,
metadata: symbol.metadata ? JSON.stringify(symbol.metadata) : null,
});
symbolIdRecords.push({
id: result.lastInsertRowid,
name: symbol.name,
filePath: record.path,
line: symbol.line,
column: symbol.column ?? 1,
language: record.language,
});
});
}
if (content !== undefined) {
fileContents.set(record.path, content);
}
if (dependencies.length) {
dependencies.forEach((dep) => {
insertDependencyStmt.run({
from_path: dep.from_path,
to_path: dep.to_path,
kind: dep.kind,
metadata: dep.metadata ? JSON.stringify(dep.metadata) : null,
});
});
}
}
for (const signal of frameworks.signals) {
insertFrameworkStmt.run(signal);
}
storeProjectSummary(summary);
});
initialInsertTx();
const defsByName = symbolIdRecords.reduce((acc, record) => {
const list = acc.get(record.name) ?? [];
list.push(record);
acc.set(record.name, list);
return acc;
}, new Map());
const referenceTx = db.transaction(() => {
navigationData.forEach((nav, filePath) => {
const references = Array.isArray(nav.references) ? nav.references : [];
const definitions = Array.isArray(nav.definitions) ? nav.definitions : [];
const referenceMap = new Map();
references.forEach((ref) => {
if (!ref || typeof ref.name !== "string") return;
const name = ref.name.trim();
if (!name) return;
const line = Number.isFinite(ref.line) ? Math.trunc(ref.line) : null;
const column = Number.isFinite(ref.column) ? Math.trunc(ref.column) : null;
if (!line || line <= 0) return;
const key = `${name}:${line}:${column ?? 0}`;
referenceMap.set(key, {
name,
line,
column: column ?? null,
snippet: typeof ref.snippet === "string" ? ref.snippet : null,
metadata:
typeof ref.metadata === "object" && ref.metadata !== null
? { ...ref.metadata }
: {},
});
});
definitions.forEach((definition) => {
if (!definition || typeof definition.name !== "string") return;
const name = definition.name.trim();
if (!name) return;
const defs = defsByName.get(name);
if (!defs || !defs.length) return;
referenceMap.forEach((ref) => {
if (ref.name !== name) return;
defs.forEach((def) => {
if (def.filePath === filePath && def.line === ref.line && def.column === ref.column) {
return;
}
const metadata = {
engine: nav.engine ?? "tree_sitter",
language: def.language ?? null,
...ref.metadata,
};
insertSymbolReferenceStmt.run({
symbol_id: def.id,
file_path: filePath,
line: ref.line,
column: ref.column ?? null,
snippet: ref.snippet,
metadata: JSON.stringify(metadata),
});
referenceCount += 1;
});
});
});
});
fileContents.forEach((content, filePath) => {
if (navigationData.has(filePath)) return;
if (typeof content !== "string" || content.length === 0) return;
const lines = content.split(/\r?\n/);
defsByName.forEach((defs, symbolName) => {
const regex = getCachedRegex(symbolName);
lines.some((line, lineIndex) => {
if (referenceCount >= FALLBACK_REFERENCE_SAMPLE_LIMIT) {
return true;
}
let match;
while ((match = regex.exec(line)) !== null) {
const column = match.index + 1;
const snippet = line.trim();
defs.forEach((def) => {
if (
def.filePath === filePath &&
def.line === lineIndex + 1 &&
def.column === column
) {
return;
}
if (referenceCount >= FALLBACK_REFERENCE_SAMPLE_LIMIT) return;
insertSymbolReferenceStmt.run({
symbol_id: def.id,
file_path: filePath,
line: lineIndex + 1,
column,
snippet,
metadata: JSON.stringify({
language: def.language,
engine: "heuristic",
}),
});
referenceCount += 1;
});
if (referenceCount >= FALLBACK_REFERENCE_SAMPLE_LIMIT) {
return true;
}
}
return referenceCount >= FALLBACK_REFERENCE_SAMPLE_LIMIT;
});
});
});
});
referenceTx();
logger.info(
{
fileCount: fileInfos.length,
frameworks: summary.frameworks,
languages: summary.languages,
},
"Workspace index rebuild complete",
);
try {
await ensureClaudeDoc(summary);
} catch (err) {
logger.warn({ err }, "Failed to update CLAUDE.md");
}
return summary;
}
function getProjectSummary() {
const summary = readProjectSummary();
if (summary) return summary;
return {
indexedAt: null,
workspaceRoot,
fileCount: 0,
languages: [],
frameworks: [],
frameworkSignals: [],
message: "No project summary found. Run workspace_index_rebuild to generate one.",
};
}
async function fallbackSearch({ query, regex, limit, ignore }) {
const patterns = ["**/*"];
const entries = await fg(patterns, {
cwd: workspaceRoot,
ignore,
onlyFiles: true,
unique: true,
followSymbolicLinks: false,
});
const matches = [];
const matcher = regex ? new RegExp(query, "g") : null;
for (const entry of entries) {
if (matches.length >= limit) break;
const content = await readFileExcerpt(entry);
if (!content) continue;
const lines = content.split("\n");
for (let i = 0; i < lines.length; i += 1) {
if (matches.length >= limit) break;
const line = lines[i];
if (regex) {
if (matcher.test(line)) {
matches.push({
path: entry,
line: i + 1,
column: null,
match: line,
});
matcher.lastIndex = 0;
}
} else if (line.includes(query)) {
matches.push({
path: entry,
line: i + 1,
column: line.indexOf(query),
match: line,
});
}
}
}
return matches;
}
async function searchWorkspace(options = {}) {
const query = options.query ?? options.term ?? options.pattern;
if (typeof query !== "string" || query.trim().length === 0) {
throw new Error("Search query must be a non-empty string.");
}
const trimmedQuery = query.trim();
const regex = options.regex === true || options.isRegex === true;
const limit =
Number.isInteger(options.limit) && options.limit > 0
? Math.min(options.limit, MAX_RESULTS)
: 50;
const ignore = normaliseIgnore(options.ignore);
try {
const results = await searchWithRipgrep({
query: trimmedQuery,
regex,
limit,
ignore,
});
return {
engine: "ripgrep",
query: trimmedQuery,
regex,
limit,
matches: results,
};
} catch (err) {
logger.warn({ err }, "ripgrep search failed, falling back to Node search");
const results = await fallbackSearch({
query: trimmedQuery,
regex,
limit,
ignore,
});
return {
engine: "fallback",
query: trimmedQuery,
regex,
limit,
matches: results,
};
}
}
function searchSymbols(options = {}) {
const query = options.query ?? options.name ?? options.symbol;
if (typeof query !== "string" || query.trim().length === 0) {
throw new Error("Symbol query must be a non-empty string.");
}
const trimmedQuery = query.trim();
const limit =
Number.isInteger(options.limit) && options.limit > 0
? Math.min(options.limit, MAX_RESULTS)
: 50;
const language = options.language ? String(options.language).toLowerCase() : null;
const filePath =
typeof options.path === "string"
? options.path
: typeof options.file === "string"
? options.file
: null;
let sql = `SELECT s.file_path,
s.name,
s.kind,
s.line,
s.column,
s.metadata,
f.language
FROM symbols s
LEFT JOIN files f ON s.file_path = f.path
WHERE s.name LIKE ?`;
const params = [`%${trimmedQuery}%`];
if (language) {
sql += " AND (f.language = ? OR LOWER(f.language) = ?)";
params.push(language, language);
}
if (filePath) {
sql += " AND s.file_path = ?";
params.push(filePath);
}
sql += " ORDER BY s.name ASC, s.line ASC LIMIT ?";
params.push(limit);
const rows = db.prepare(sql).all(...params);
return rows.map((row) => ({
filePath: row.file_path,
name: row.name,
kind: row.kind,
line: row.line,
column: row.column ?? null,
language: row.language ?? null,
metadata: row.metadata ? JSON.parse(row.metadata) : null,
}));
}
function searchSymbolReferences(options = {}) {
const query = options.symbol ?? options.name ?? options.query;
if (typeof query !== "string" || query.trim().length === 0) {
throw new Error("Symbol reference query must be a non-empty string.");
}
const trimmedQuery = query.trim();
const limit =
Number.isInteger(options.limit) && options.limit > 0
? Math.min(options.limit, MAX_RESULTS)
: 100;
const filePath =
typeof options.path === "string"
? options.path
: typeof options.file === "string"
? options.file
: undefined;
let sql = `SELECT s.name,
s.kind,
s.file_path AS definition_path,
s.line AS definition_line,
s.column AS definition_column,
r.file_path,
r.line,
r.column,
r.snippet,
r.metadata,
f.language
FROM symbol_references r
JOIN symbols s ON r.symbol_id = s.id
LEFT JOIN files f ON s.file_path = f.path
WHERE s.name LIKE ?`;
const params = [`%${trimmedQuery}%`];
if (filePath) {
sql += " AND r.file_path = ?";
params.push(filePath);
}
sql += " ORDER BY s.name ASC, r.file_path ASC, r.line ASC LIMIT ?";
params.push(limit);
const rows = db.prepare(sql).all(...params);
const parsed = rows.map((row) => {
const symbolMetadata = row.symbol_metadata ? safeParseJson(row.symbol_metadata) : null;
const referenceMetadata = row.reference_metadata ? safeParseJson(row.reference_metadata) : null;
const engine = referenceMetadata?.engine ?? symbolMetadata?.engine ?? null;
return {
symbol: row.name,
kind: row.kind,
definition: {
filePath: row.definition_path,
line: row.definition_line,
column: row.definition_column,
},
reference: {
filePath: row.file_path,
line: row.line,
column: row.column ?? null,
snippet: row.snippet ?? null,
},
language: row.language ?? null,
engine,
metadata: {
symbol: symbolMetadata,
reference: referenceMetadata,
},
};
});
parsed.sort((a, b) => {
const priority = (engine) => {
if (!engine) return 10;
return engine === "tree_sitter" ? 0 : engine === "heuristic" ? 5 : 3;
};
const diff = priority(a.engine) - priority(b.engine);
if (diff !== 0) return diff;
if (a.reference.filePath !== b.reference.filePath) {
return a.reference.filePath.localeCompare(b.reference.filePath);
}
return (a.reference.line ?? 0) - (b.reference.line ?? 0);
});
return parsed;
}
function formatGotoDefinitionResult(row) {
const referenceMeta = row.metadata ? safeParseJson(row.metadata, {}) : {};
return {
symbol: row.name,
kind: row.kind,
definition: {
filePath: row.definition_path,
line: row.definition_line,
column: row.definition_column,
},
reference: {
filePath: row.reference_path,
line: row.reference_line,
column: row.reference_column ?? null,
snippet: row.snippet ?? null,
},
engine: referenceMeta.engine ?? null,
metadata: referenceMeta,
};
}
function findDefinitionNearLocation({ filePath, line, column, limit = 10 }) {
if (typeof filePath !== "string" || filePath.trim().length === 0) {
throw new Error("go to definition requires a file path.");
}
if (!Number.isInteger(line) || line <= 0) {
throw new Error("go to definition requires a positive line number.");
}
const params = {
filePath,
line,
column: Number.isInteger(column) && column > 0 ? column : null,
limit: Math.min(limit, 20),
};
const rows = selectDefinitionByLocationStmt.all(params).map(formatGotoDefinitionResult);
if (!rows.length && column !== null) {
params.column = null;
return selectDefinitionByLocationStmt.all(params).map(formatGotoDefinitionResult);
}
const preferred = rows.filter((row) => row.engine === "tree_sitter");
return preferred.length ? preferred : rows;
}
function listDefinitionsBySymbol({ name, limit = 50 }) {
if (typeof name !== "string" || name.trim().length === 0) {
throw new Error("Definition query requires a symbol name.");
}
const rows = selectDefinitionsBySymbolStmt.all({
name,
limit: Math.min(limit, 200),
});
return rows
.map((row) => ({
name: row.name,
kind: row.kind,
filePath: row.file_path,
line: row.line,
column: row.column ?? null,
metadata: safeParseJson(row.metadata, null),
}))
.sort((a, b) => {
const aEngine = a.metadata?.engine ?? null;
const bEngine = b.metadata?.engine ?? null;
if (aEngine === bEngine) return 0;
if (aEngine === "tree_sitter") return -1;
if (bEngine === "tree_sitter") return 1;
return 0;
});
}
module.exports = {
listWorkspaceFiles,
searchWorkspace,
rebuildWorkspaceIndex,
getProjectSummary,
searchSymbols,
searchSymbolReferences,
findDefinitionNearLocation,
listDefinitionsBySymbol,
};