UNPKG

termcode

Version:

Superior terminal AI coding agent with enterprise-grade security, intelligent error recovery, performance monitoring, and plugin system - Advanced Claude Code alternative

389 lines (388 loc) 15 kB
import fg from "fast-glob"; import { promises as fs } from "node:fs"; import { createHash } from "node:crypto"; import path from "node:path"; import { getProvider } from "../providers/index.js"; import { loadConfig } from "../state/config.js"; import { log } from "../util/logging.js"; // File extensions to include in indexing (prioritized by importance) const HIGH_PRIORITY_EXTENSIONS = new Set([ '.js', '.jsx', '.ts', '.tsx', '.py', '.go', '.rs', '.java', '.c', '.cpp' ]); const MEDIUM_PRIORITY_EXTENSIONS = new Set([ '.h', '.hpp', '.cs', '.php', '.rb', '.swift', '.kt', '.scala', '.sh', '.bash' ]); const LOW_PRIORITY_EXTENSIONS = new Set([ '.json', '.yaml', '.yml', '.md', '.txt', '.sql', '.html', '.css', '.scss' ]); const INDEXABLE_EXTENSIONS = new Set([ ...HIGH_PRIORITY_EXTENSIONS, ...MEDIUM_PRIORITY_EXTENSIONS, ...LOW_PRIORITY_EXTENSIONS, '.zsh', '.fish', '.ps1', '.bat', '.cmd', '.xml', '.less', '.rst', '.graphql', '.proto', '.thrift', '.dockerfile', '.makefile', '.cmake' ]); // Smart file size limits to skip huge files const MAX_FILE_SIZE = 1024 * 1024; // 1MB const MAX_CHUNK_FILE_SIZE = 512 * 1024; // 512KB for chunking // Queue for background indexing class IndexingQueue { queue = []; running = false; debounceTimers = new Map(); async enqueue(repo, priority = 0) { return new Promise((resolve, reject) => { // Remove existing entry for same repo this.queue = this.queue.filter(item => item.repo !== repo); // Add to queue this.queue.push({ repo, priority, resolve, reject }); this.queue.sort((a, b) => b.priority - a.priority); if (!this.running) { this.processQueue(); } }); } debounce(repo, delay = 5000) { return new Promise((resolve, reject) => { // Clear existing timer if (this.debounceTimers.has(repo)) { clearTimeout(this.debounceTimers.get(repo)); } // Set new timer const timer = setTimeout(async () => { this.debounceTimers.delete(repo); try { await this.enqueue(repo, 1); resolve(); } catch (error) { reject(error); } }, delay); this.debounceTimers.set(repo, timer); }); } async processQueue() { if (this.running || this.queue.length === 0) return; this.running = true; while (this.queue.length > 0) { const item = this.queue.shift(); try { await buildIndexInternal(item.repo); item.resolve(); } catch (error) { item.reject(error); } } this.running = false; } } const indexingQueue = new IndexingQueue(); function getFileHash(content) { return createHash('md5').update(content).digest('hex'); } function shouldIncludeFile(filePath, fileStats) { const ext = path.extname(filePath).toLowerCase(); const basename = path.basename(filePath).toLowerCase(); // Skip obviously unimportant files if (basename.startsWith('.') && !basename.endsWith('.json') && !basename.endsWith('.md')) { return false; } // Skip generated/build files const skipPatterns = [ /\.min\.(js|css)$/, /\.bundle\.(js|css)$/, /\.chunk\.(js|css)$/, /-[a-f0-9]{8,}\.(js|css)$/, // Webpack hashes /\.d\.ts$/, // TypeScript declaration files (usually generated) /package-lock\.json$/, /yarn\.lock$/, /pnpm-lock\.yaml$/, /composer\.lock$/, /Gemfile\.lock$/, /poetry\.lock$/ ]; if (skipPatterns.some(pattern => pattern.test(basename))) { return false; } // Check file size if available if (fileStats && fileStats.size > MAX_FILE_SIZE) { return false; } return INDEXABLE_EXTENSIONS.has(ext) || basename === 'dockerfile' || basename === 'makefile'; } function getFilePriority(filePath) { const ext = path.extname(filePath).toLowerCase(); const basename = path.basename(filePath).toLowerCase(); // Configuration files get high priority if (['package.json', 'cargo.toml', 'go.mod', 'pyproject.toml', 'requirements.txt'].includes(basename)) { return 3; } if (HIGH_PRIORITY_EXTENSIONS.has(ext)) return 3; if (MEDIUM_PRIORITY_EXTENSIONS.has(ext)) return 2; if (LOW_PRIORITY_EXTENSIONS.has(ext)) return 1; return 0; } async function loadExistingIndex(indexPath) { try { const content = await fs.readFile(indexPath, "utf8"); const data = JSON.parse(content); // Validate structure if (!data.metadata || !data.chunks || !Array.isArray(data.chunks)) { return null; } return data; } catch (error) { return null; } } async function getModifiedFiles(repo, existingIndex) { // Fast glob with optimized patterns const files = await fg(["**/*"], { cwd: repo, dot: false, onlyFiles: true, stats: true, // Get file stats for size filtering ignore: [ "node_modules/**", ".git/**", "dist/**", "build/**", "target/**", "coverage/**", ".next/**", ".nuxt/**", ".vscode/**", ".idea/**", "*.log", ".termcode-*", // Skip common large directories "**/*.min.js", "**/*.min.css", "**/*.bundle.js", "**/*.chunk.js", "**/package-lock.json", "**/yarn.lock", "**/pnpm-lock.yaml" ] }); // Filter files by extension and size first (very fast) const candidateFiles = files .filter(entry => shouldIncludeFile(entry.path, entry.stats)) .sort((a, b) => getFilePriority(b.path) - getFilePriority(a.path)) // Process important files first .map(entry => entry.path); log.step("Scanning files", `checking ${candidateFiles.length} candidates`); // Process files in parallel batches for hash checking const modifiedFiles = []; const batchSize = 20; for (let i = 0; i < candidateFiles.length; i += batchSize) { const batch = candidateFiles.slice(i, i + batchSize); const batchResults = await Promise.allSettled(batch.map(async (file) => { try { // Use fs.stat first for quick file check const stats = await fs.stat(path.resolve(repo, file)); if (stats.size > MAX_FILE_SIZE) { return null; // Skip large files } // Only read content if we need to check hash if (!existingIndex || !existingIndex.metadata.fileHashes[file]) { return file; // New file, definitely modified } const content = await fs.readFile(path.resolve(repo, file), "utf8"); const currentHash = getFileHash(content); if (existingIndex.metadata.fileHashes[file] !== currentHash) { return file; // Modified file } return null; // Unchanged file } catch (error) { return null; // File error, skip } })); // Collect successful results for (const result of batchResults) { if (result.status === 'fulfilled' && result.value) { modifiedFiles.push(result.value); } } } return modifiedFiles; } async function buildIndexInternal(repo, outPath = ".termcode-index.json") { const indexPath = path.resolve(repo, outPath); // Quick check: if index exists and is recent, maybe skip entirely let existingIndex = await loadExistingIndex(indexPath); if (existingIndex) { const indexAge = Date.now() - new Date(existingIndex.metadata.lastModified).getTime(); if (indexAge < 60000) { // Less than 1 minute old log.success("Index is recent, skipping"); return; } } log.step("Building index", "scanning codebase..."); const modifiedFiles = await getModifiedFiles(repo, existingIndex); if (modifiedFiles.length === 0 && existingIndex) { log.success("✓ Index up to date"); return; } // Smart limit: if too many files, only process the most important ones initially let filesToProcess = modifiedFiles; if (modifiedFiles.length > 100) { filesToProcess = modifiedFiles .sort((a, b) => getFilePriority(b) - getFilePriority(a)) .slice(0, 100); log.step("Prioritizing", `processing ${filesToProcess.length} most important files`); } else { log.step("Processing files", `${modifiedFiles.length} files to index`); } const config = await loadConfig(); if (!config) { throw new Error("No configuration found. Please run onboarding first."); } // Find embedding provider let embedProvider; let embedModel; // Try current provider first try { embedProvider = getProvider(config.defaultProvider); embedModel = config.models[config.defaultProvider]?.embed; if (!embedModel) throw new Error("No embed model"); // Test embeddings capability with a small test await embedProvider.embed(["test"], { model: embedModel }); } catch (e) { // Fallback to OpenAI try { embedProvider = getProvider("openai"); embedModel = config.models.openai?.embed || "text-embedding-3-small"; await embedProvider.embed(["test"], { model: embedModel }); } catch (e2) { log.warn("No embedding provider available - index will be text-only"); embedProvider = null; embedModel = null; } } // Start with existing chunks or empty array let allChunks = existingIndex ? existingIndex.chunks.filter(chunk => !modifiedFiles.includes(chunk.file)) : []; const fileHashes = existingIndex ? { ...existingIndex.metadata.fileHashes } : {}; // Process modified files with smart chunking and parallel processing const processingBatchSize = 10; let processedCount = 0; for (let i = 0; i < filesToProcess.length; i += processingBatchSize) { const batch = filesToProcess.slice(i, i + processingBatchSize); const batchChunks = await Promise.allSettled(batch.map(async (file) => { const full = path.resolve(repo, file); let content = ""; try { content = await fs.readFile(full, "utf8"); fileHashes[file] = getFileHash(content); } catch (error) { return []; } // Skip files that are too large for chunking if (content.length > MAX_CHUNK_FILE_SIZE) { log.warn(`Skipping large file: ${file} (${Math.round(content.length / 1024)}KB)`); return []; } const lines = content.split("\n"); const chunks = []; // Adaptive chunk size based on file type and content let chunkSize = 200; // default lines per chunk const ext = path.extname(file).toLowerCase(); if (['.json', '.yaml', '.yml'].includes(ext)) { chunkSize = 100; // Smaller chunks for config files } else if (['.md', '.txt'].includes(ext)) { chunkSize = 300; // Larger chunks for documentation } for (let lineStart = 0; lineStart < lines.length; lineStart += chunkSize) { const slice = lines.slice(lineStart, lineStart + chunkSize).join("\n"); if (slice.trim().length < 50) continue; // Skip tiny chunks chunks.push({ file, start: lineStart + 1, end: Math.min(lineStart + chunkSize, lines.length), text: slice, embedding: undefined // Will add embeddings in next step if available }); } return chunks; })); // Collect chunks from successful results const newChunks = []; for (const result of batchChunks) { if (result.status === 'fulfilled') { newChunks.push(...result.value); } } // Add embeddings in parallel if provider is available if (embedProvider && embedModel && newChunks.length > 0) { try { const texts = newChunks.map(chunk => chunk.text); const embeddings = await embedProvider.embed(texts, { model: embedModel }); newChunks.forEach((chunk, idx) => { if (embeddings[idx]) { chunk.embedding = embeddings[idx]; } }); } catch (error) { log.warn(`Failed to embed batch of ${newChunks.length} chunks`); } } allChunks.push(...newChunks); processedCount += batch.length; // Progress update if (processedCount % 50 === 0 || processedCount === filesToProcess.length) { log.step("Processing", `${processedCount}/${filesToProcess.length} files indexed`); } } // Create updated index const indexData = { metadata: { version: 1, createdAt: existingIndex?.metadata.createdAt || new Date().toISOString(), lastModified: new Date().toISOString(), provider: embedProvider?.id || "none", model: embedModel || "none", fileCount: Object.keys(fileHashes).length, chunkCount: allChunks.length, fileHashes }, chunks: allChunks }; await fs.writeFile(indexPath, JSON.stringify(indexData, null, 2), "utf8"); log.success(`Index updated: ${allChunks.length} chunks from ${Object.keys(fileHashes).length} files`); } // Public API export async function buildIndex(repo, outPath = ".termcode-index.json") { return buildIndexInternal(repo, outPath); } export async function buildIndexBackground(repo) { return indexingQueue.enqueue(repo, 0); } export async function rebuildIndexDebounced(repo, delay = 5000) { return indexingQueue.debounce(repo, delay); } export async function getIndexStats(repo) { try { const indexPath = path.resolve(repo, ".termcode-index.json"); const existingIndex = await loadExistingIndex(indexPath); return existingIndex?.metadata || null; } catch (error) { return null; } }