termcode
Version:
Superior terminal AI coding agent with enterprise-grade security, intelligent error recovery, performance monitoring, and plugin system - Advanced Claude Code alternative
389 lines (388 loc) • 15 kB
JavaScript
import fg from "fast-glob";
import { promises as fs } from "node:fs";
import { createHash } from "node:crypto";
import path from "node:path";
import { getProvider } from "../providers/index.js";
import { loadConfig } from "../state/config.js";
import { log } from "../util/logging.js";
// File extensions to include in indexing (prioritized by importance)
const HIGH_PRIORITY_EXTENSIONS = new Set([
'.js', '.jsx', '.ts', '.tsx', '.py', '.go', '.rs', '.java', '.c', '.cpp'
]);
const MEDIUM_PRIORITY_EXTENSIONS = new Set([
'.h', '.hpp', '.cs', '.php', '.rb', '.swift', '.kt', '.scala', '.sh', '.bash'
]);
const LOW_PRIORITY_EXTENSIONS = new Set([
'.json', '.yaml', '.yml', '.md', '.txt', '.sql', '.html', '.css', '.scss'
]);
const INDEXABLE_EXTENSIONS = new Set([
...HIGH_PRIORITY_EXTENSIONS,
...MEDIUM_PRIORITY_EXTENSIONS,
...LOW_PRIORITY_EXTENSIONS,
'.zsh', '.fish', '.ps1', '.bat', '.cmd', '.xml', '.less', '.rst',
'.graphql', '.proto', '.thrift', '.dockerfile', '.makefile', '.cmake'
]);
// Smart file size limits to skip huge files
const MAX_FILE_SIZE = 1024 * 1024; // 1MB
const MAX_CHUNK_FILE_SIZE = 512 * 1024; // 512KB for chunking
// Queue for background indexing
class IndexingQueue {
queue = [];
running = false;
debounceTimers = new Map();
async enqueue(repo, priority = 0) {
return new Promise((resolve, reject) => {
// Remove existing entry for same repo
this.queue = this.queue.filter(item => item.repo !== repo);
// Add to queue
this.queue.push({ repo, priority, resolve, reject });
this.queue.sort((a, b) => b.priority - a.priority);
if (!this.running) {
this.processQueue();
}
});
}
debounce(repo, delay = 5000) {
return new Promise((resolve, reject) => {
// Clear existing timer
if (this.debounceTimers.has(repo)) {
clearTimeout(this.debounceTimers.get(repo));
}
// Set new timer
const timer = setTimeout(async () => {
this.debounceTimers.delete(repo);
try {
await this.enqueue(repo, 1);
resolve();
}
catch (error) {
reject(error);
}
}, delay);
this.debounceTimers.set(repo, timer);
});
}
async processQueue() {
if (this.running || this.queue.length === 0)
return;
this.running = true;
while (this.queue.length > 0) {
const item = this.queue.shift();
try {
await buildIndexInternal(item.repo);
item.resolve();
}
catch (error) {
item.reject(error);
}
}
this.running = false;
}
}
const indexingQueue = new IndexingQueue();
function getFileHash(content) {
return createHash('md5').update(content).digest('hex');
}
function shouldIncludeFile(filePath, fileStats) {
const ext = path.extname(filePath).toLowerCase();
const basename = path.basename(filePath).toLowerCase();
// Skip obviously unimportant files
if (basename.startsWith('.') && !basename.endsWith('.json') && !basename.endsWith('.md')) {
return false;
}
// Skip generated/build files
const skipPatterns = [
/\.min\.(js|css)$/,
/\.bundle\.(js|css)$/,
/\.chunk\.(js|css)$/,
/-[a-f0-9]{8,}\.(js|css)$/, // Webpack hashes
/\.d\.ts$/, // TypeScript declaration files (usually generated)
/package-lock\.json$/,
/yarn\.lock$/,
/pnpm-lock\.yaml$/,
/composer\.lock$/,
/Gemfile\.lock$/,
/poetry\.lock$/
];
if (skipPatterns.some(pattern => pattern.test(basename))) {
return false;
}
// Check file size if available
if (fileStats && fileStats.size > MAX_FILE_SIZE) {
return false;
}
return INDEXABLE_EXTENSIONS.has(ext) || basename === 'dockerfile' || basename === 'makefile';
}
function getFilePriority(filePath) {
const ext = path.extname(filePath).toLowerCase();
const basename = path.basename(filePath).toLowerCase();
// Configuration files get high priority
if (['package.json', 'cargo.toml', 'go.mod', 'pyproject.toml', 'requirements.txt'].includes(basename)) {
return 3;
}
if (HIGH_PRIORITY_EXTENSIONS.has(ext))
return 3;
if (MEDIUM_PRIORITY_EXTENSIONS.has(ext))
return 2;
if (LOW_PRIORITY_EXTENSIONS.has(ext))
return 1;
return 0;
}
async function loadExistingIndex(indexPath) {
try {
const content = await fs.readFile(indexPath, "utf8");
const data = JSON.parse(content);
// Validate structure
if (!data.metadata || !data.chunks || !Array.isArray(data.chunks)) {
return null;
}
return data;
}
catch (error) {
return null;
}
}
async function getModifiedFiles(repo, existingIndex) {
// Fast glob with optimized patterns
const files = await fg(["**/*"], {
cwd: repo,
dot: false,
onlyFiles: true,
stats: true, // Get file stats for size filtering
ignore: [
"node_modules/**",
".git/**",
"dist/**",
"build/**",
"target/**",
"coverage/**",
".next/**",
".nuxt/**",
".vscode/**",
".idea/**",
"*.log",
".termcode-*",
// Skip common large directories
"**/*.min.js",
"**/*.min.css",
"**/*.bundle.js",
"**/*.chunk.js",
"**/package-lock.json",
"**/yarn.lock",
"**/pnpm-lock.yaml"
]
});
// Filter files by extension and size first (very fast)
const candidateFiles = files
.filter(entry => shouldIncludeFile(entry.path, entry.stats))
.sort((a, b) => getFilePriority(b.path) - getFilePriority(a.path)) // Process important files first
.map(entry => entry.path);
log.step("Scanning files", `checking ${candidateFiles.length} candidates`);
// Process files in parallel batches for hash checking
const modifiedFiles = [];
const batchSize = 20;
for (let i = 0; i < candidateFiles.length; i += batchSize) {
const batch = candidateFiles.slice(i, i + batchSize);
const batchResults = await Promise.allSettled(batch.map(async (file) => {
try {
// Use fs.stat first for quick file check
const stats = await fs.stat(path.resolve(repo, file));
if (stats.size > MAX_FILE_SIZE) {
return null; // Skip large files
}
// Only read content if we need to check hash
if (!existingIndex || !existingIndex.metadata.fileHashes[file]) {
return file; // New file, definitely modified
}
const content = await fs.readFile(path.resolve(repo, file), "utf8");
const currentHash = getFileHash(content);
if (existingIndex.metadata.fileHashes[file] !== currentHash) {
return file; // Modified file
}
return null; // Unchanged file
}
catch (error) {
return null; // File error, skip
}
}));
// Collect successful results
for (const result of batchResults) {
if (result.status === 'fulfilled' && result.value) {
modifiedFiles.push(result.value);
}
}
}
return modifiedFiles;
}
async function buildIndexInternal(repo, outPath = ".termcode-index.json") {
const indexPath = path.resolve(repo, outPath);
// Quick check: if index exists and is recent, maybe skip entirely
let existingIndex = await loadExistingIndex(indexPath);
if (existingIndex) {
const indexAge = Date.now() - new Date(existingIndex.metadata.lastModified).getTime();
if (indexAge < 60000) { // Less than 1 minute old
log.success("Index is recent, skipping");
return;
}
}
log.step("Building index", "scanning codebase...");
const modifiedFiles = await getModifiedFiles(repo, existingIndex);
if (modifiedFiles.length === 0 && existingIndex) {
log.success("✓ Index up to date");
return;
}
// Smart limit: if too many files, only process the most important ones initially
let filesToProcess = modifiedFiles;
if (modifiedFiles.length > 100) {
filesToProcess = modifiedFiles
.sort((a, b) => getFilePriority(b) - getFilePriority(a))
.slice(0, 100);
log.step("Prioritizing", `processing ${filesToProcess.length} most important files`);
}
else {
log.step("Processing files", `${modifiedFiles.length} files to index`);
}
const config = await loadConfig();
if (!config) {
throw new Error("No configuration found. Please run onboarding first.");
}
// Find embedding provider
let embedProvider;
let embedModel;
// Try current provider first
try {
embedProvider = getProvider(config.defaultProvider);
embedModel = config.models[config.defaultProvider]?.embed;
if (!embedModel)
throw new Error("No embed model");
// Test embeddings capability with a small test
await embedProvider.embed(["test"], { model: embedModel });
}
catch (e) {
// Fallback to OpenAI
try {
embedProvider = getProvider("openai");
embedModel = config.models.openai?.embed || "text-embedding-3-small";
await embedProvider.embed(["test"], { model: embedModel });
}
catch (e2) {
log.warn("No embedding provider available - index will be text-only");
embedProvider = null;
embedModel = null;
}
}
// Start with existing chunks or empty array
let allChunks = existingIndex ? existingIndex.chunks.filter(chunk => !modifiedFiles.includes(chunk.file)) : [];
const fileHashes = existingIndex ?
{ ...existingIndex.metadata.fileHashes } : {};
// Process modified files with smart chunking and parallel processing
const processingBatchSize = 10;
let processedCount = 0;
for (let i = 0; i < filesToProcess.length; i += processingBatchSize) {
const batch = filesToProcess.slice(i, i + processingBatchSize);
const batchChunks = await Promise.allSettled(batch.map(async (file) => {
const full = path.resolve(repo, file);
let content = "";
try {
content = await fs.readFile(full, "utf8");
fileHashes[file] = getFileHash(content);
}
catch (error) {
return [];
}
// Skip files that are too large for chunking
if (content.length > MAX_CHUNK_FILE_SIZE) {
log.warn(`Skipping large file: ${file} (${Math.round(content.length / 1024)}KB)`);
return [];
}
const lines = content.split("\n");
const chunks = [];
// Adaptive chunk size based on file type and content
let chunkSize = 200; // default lines per chunk
const ext = path.extname(file).toLowerCase();
if (['.json', '.yaml', '.yml'].includes(ext)) {
chunkSize = 100; // Smaller chunks for config files
}
else if (['.md', '.txt'].includes(ext)) {
chunkSize = 300; // Larger chunks for documentation
}
for (let lineStart = 0; lineStart < lines.length; lineStart += chunkSize) {
const slice = lines.slice(lineStart, lineStart + chunkSize).join("\n");
if (slice.trim().length < 50)
continue; // Skip tiny chunks
chunks.push({
file,
start: lineStart + 1,
end: Math.min(lineStart + chunkSize, lines.length),
text: slice,
embedding: undefined // Will add embeddings in next step if available
});
}
return chunks;
}));
// Collect chunks from successful results
const newChunks = [];
for (const result of batchChunks) {
if (result.status === 'fulfilled') {
newChunks.push(...result.value);
}
}
// Add embeddings in parallel if provider is available
if (embedProvider && embedModel && newChunks.length > 0) {
try {
const texts = newChunks.map(chunk => chunk.text);
const embeddings = await embedProvider.embed(texts, { model: embedModel });
newChunks.forEach((chunk, idx) => {
if (embeddings[idx]) {
chunk.embedding = embeddings[idx];
}
});
}
catch (error) {
log.warn(`Failed to embed batch of ${newChunks.length} chunks`);
}
}
allChunks.push(...newChunks);
processedCount += batch.length;
// Progress update
if (processedCount % 50 === 0 || processedCount === filesToProcess.length) {
log.step("Processing", `${processedCount}/${filesToProcess.length} files indexed`);
}
}
// Create updated index
const indexData = {
metadata: {
version: 1,
createdAt: existingIndex?.metadata.createdAt || new Date().toISOString(),
lastModified: new Date().toISOString(),
provider: embedProvider?.id || "none",
model: embedModel || "none",
fileCount: Object.keys(fileHashes).length,
chunkCount: allChunks.length,
fileHashes
},
chunks: allChunks
};
await fs.writeFile(indexPath, JSON.stringify(indexData, null, 2), "utf8");
log.success(`Index updated: ${allChunks.length} chunks from ${Object.keys(fileHashes).length} files`);
}
// Public API
export async function buildIndex(repo, outPath = ".termcode-index.json") {
return buildIndexInternal(repo, outPath);
}
export async function buildIndexBackground(repo) {
return indexingQueue.enqueue(repo, 0);
}
export async function rebuildIndexDebounced(repo, delay = 5000) {
return indexingQueue.debounce(repo, delay);
}
export async function getIndexStats(repo) {
try {
const indexPath = path.resolve(repo, ".termcode-index.json");
const existingIndex = await loadExistingIndex(indexPath);
return existingIndex?.metadata || null;
}
catch (error) {
return null;
}
}