ai-index
Version:
AI-powered local code indexing and search system for any codebase
543 lines (449 loc) ⢠15.8 kB
JavaScript
import { globby } from 'globby';
import fs from 'fs/promises';
import path from 'path';
import { fileURLToPath } from 'url';
import { loadConfig } from './config.js';
import { createLocalEmbedder } from './local-embedder.js';
import { createLocalVectorStore } from './local-vector-store.js';
import { CodeAnalyzer } from './code-analyzer.js';
import { FileMonitor } from './file-monitor.js';
import { execSync } from 'child_process';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
export class SmartIndexer {
constructor(options = {}) {
this.rootPath = options.rootPath || process.cwd();
this.indexName = options.indexName || path.basename(this.rootPath).replace(/[^a-zA-Z0-9_-]/g, '_');
this.entryPoints = options.entryPoints || [];
this.watchMode = options.watch || false;
this.analyzer = new CodeAnalyzer();
this.monitor = null;
this.embedder = null;
this.vectorStore = null;
this.config = null;
this.symbolIndex = new Map();
this.fileIndex = new Map();
this.importGraph = new Map();
}
async initialize() {
console.log('š§ Initializing Smart Indexer...');
// Load configuration
this.config = await loadConfig();
// Initialize embedder and vector store
this.embedder = await createLocalEmbedder(this.config);
this.vectorStore = await createLocalVectorStore(this.config, this.indexName);
console.log(`š Index: ${this.indexName}`);
console.log(`š Root: ${this.rootPath}`);
console.log(`š¢ Embedding dimensions: ${this.embedder.getDimensions()}`);
if (this.watchMode) {
this.setupFileMonitor();
}
}
setupFileMonitor() {
this.monitor = new FileMonitor({
rootPath: this.rootPath,
debounceDelay: 2000
});
this.monitor.on('files-to-index', async (files) => {
console.log(`\nš Reindexing ${files.length} changed files...`);
for (const file of files) {
await this.indexFile(file.fullPath, file.content, file.path);
}
await this.updateManifest();
console.log('ā
Reindexing complete');
});
this.monitor.on('files-deleted', async (filePaths) => {
console.log(`\nšļø Removing ${filePaths.length} deleted files from index...`);
for (const filePath of filePaths) {
await this.removeFileFromIndex(filePath);
}
console.log('ā
Cleanup complete');
});
}
async start() {
await this.initialize();
if (this.entryPoints.length > 0) {
await this.indexFromEntryPoints();
} else {
await this.indexAllFiles();
}
if (this.watchMode) {
console.log('\nšļø Starting continuous monitoring...');
await this.monitor.start();
// Keep process running
process.on('SIGINT', async () => {
console.log('\nā¹ļø Stopping indexer...');
await this.monitor.stop();
process.exit(0);
});
}
}
async indexFromEntryPoints() {
console.log('\nšÆ Indexing from entry points...');
const visited = new Set();
const toVisit = [...this.entryPoints];
while (toVisit.length > 0) {
const file = toVisit.shift();
if (visited.has(file)) continue;
visited.add(file);
const fullPath = path.isAbsolute(file) ? file : path.join(this.rootPath, file);
try {
const content = await fs.readFile(fullPath, 'utf-8');
const analysis = await this.analyzer.analyzeFile(fullPath, content);
if (analysis) {
await this.indexAnalysis(analysis, content);
// Add imported files to visit queue (only JavaScript files)
for (const imp of analysis.imports) {
if (imp.source.startsWith('.')) {
const resolvedPath = await this.resolveImport(fullPath, imp.source);
if (resolvedPath && !visited.has(resolvedPath) && this.isJavaScriptFile(resolvedPath)) {
toVisit.push(resolvedPath);
}
}
}
}
} catch (error) {
console.error(`Error indexing ${file}:`, error.message);
}
}
console.log(`ā
Indexed ${visited.size} files from entry points`);
}
async indexAllFiles() {
console.log('\nš Indexing JavaScript files (respecting .gitignore)...');
// Only JavaScript files for now as requested
const patterns = ['**/*.{js,mjs}'];
const ignorePatterns = [
'**/node_modules/**',
'**/dist/**',
'**/build/**',
'**/*.min.js',
'**/*.test.js',
'**/*.spec.js'
];
// Add git-ignored files to ignore patterns
const gitIgnored = await this.getGitIgnoredFiles();
ignorePatterns.push(...gitIgnored);
const files = await globby(patterns, {
cwd: this.rootPath,
ignore: ignorePatterns,
absolute: false,
gitignore: true
});
console.log(`Found ${files.length} JavaScript files to index`);
let processed = 0;
for (const file of files) {
const fullPath = path.join(this.rootPath, file);
try {
const content = await fs.readFile(fullPath, 'utf-8');
await this.indexFile(fullPath, content, file);
processed++;
if (processed % 50 === 0) {
console.log(`Processed ${processed}/${files.length} files...`);
}
} catch (error) {
console.error(`Error indexing ${file}:`, error.message);
}
}
await this.updateManifest();
console.log(`ā
Indexed ${processed} files successfully`);
}
async indexFile(fullPath, content, relativePath) {
const analysis = await this.analyzer.analyzeFile(fullPath, content);
if (analysis) {
// Remove old chunks if file was previously indexed
if (this.fileIndex.has(relativePath)) {
await this.vectorStore.removeDocumentsByFile(relativePath);
}
await this.indexAnalysis(analysis, content, relativePath);
// Update file index
this.fileIndex.set(relativePath, {
symbols: analysis.symbols.map(s => s.name),
imports: analysis.imports.map(i => i.source),
exports: analysis.exports.map(e => e.name).filter(n => n),
complexity: analysis.complexity,
lastIndexed: new Date().toISOString()
});
}
}
async indexAnalysis(analysis, content, relativePath = null) {
const filePath = relativePath || analysis.filePath;
const documents = [];
// Create enriched chunks with semantic understanding
for (const chunk of analysis.chunks) {
const embedding = await this.createEnrichedEmbedding(chunk, analysis);
documents.push({
id: `${filePath}:${chunk.startLine}:${chunk.type}`,
repo_path: filePath,
content: chunk.content,
embedding,
// Enhanced metadata
chunk_type: chunk.type,
symbol_name: chunk.symbolName,
symbol_type: chunk.symbolType,
start_line: chunk.startLine,
end_line: chunk.endLine,
// Code structure metadata
complexity: chunk.metadata.complexity || analysis.complexity,
async: chunk.metadata.async || false,
params: chunk.metadata.params || [],
methods: chunk.metadata.methods || [],
extends: chunk.metadata.extends,
// Relationship metadata
imports: chunk.type === 'imports' ? chunk.metadata.sources : [],
exports: chunk.type === 'exports' ? chunk.metadata.exportedSymbols : [],
// File context
file_symbols: analysis.symbols.map(s => s.name),
file_imports: analysis.imports.map(i => i.source),
file_exports: analysis.exports.map(e => e.name).filter(n => n),
// Usage tracking for better search context
usages: analysis.usages || [],
references: analysis.references || [],
language: analysis.language,
area: this.inferArea(filePath)
});
}
// Index documents in vector store
if (documents.length > 0) {
await this.vectorStore.addDocuments(documents);
}
// Update symbol index
analysis.symbols.forEach(symbol => {
const symbolId = `${filePath}:${symbol.name}`;
this.symbolIndex.set(symbolId, {
...symbol,
filePath,
references: [],
callers: [],
callees: []
});
});
// Update import graph
this.importGraph.set(filePath, {
imports: analysis.imports,
exports: analysis.exports,
symbols: analysis.symbols
});
}
async createEnrichedEmbedding(chunk, analysis) {
// Create context-aware text for embedding
let enrichedText = chunk.content;
// Add symbol context
if (chunk.symbolName) {
enrichedText = `${chunk.symbolType} ${chunk.symbolName}\n${enrichedText}`;
}
// Add file context
const fileContext = `File: ${analysis.filePath} Language: ${analysis.language}`;
enrichedText = `${fileContext}\n${enrichedText}`;
// Add relationship context
if (chunk.type === 'symbol' && chunk.symbolName) {
const usedBy = this.findSymbolReferences(chunk.symbolName, analysis.filePath);
if (usedBy.length > 0) {
enrichedText += `\nUsed by: ${usedBy.join(', ')}`;
}
}
return await this.embedder.embed(enrichedText);
}
findSymbolReferences(symbolName, filePath) {
const references = [];
// Look through import graph for references
this.importGraph.forEach((fileData, file) => {
if (file !== filePath) {
fileData.imports.forEach(imp => {
if (imp.specifiers) {
imp.specifiers.forEach(spec => {
if (spec.imported === symbolName || spec.local === symbolName) {
references.push(file);
}
});
}
});
}
});
return references;
}
async removeFileFromIndex(relativePath) {
// Remove from vector store
await this.vectorStore.removeDocumentsByFile(relativePath);
// Remove from indexes
this.fileIndex.delete(relativePath);
this.importGraph.delete(relativePath);
// Remove symbols
const symbolsToRemove = [];
this.symbolIndex.forEach((symbol, id) => {
if (symbol.filePath === relativePath) {
symbolsToRemove.push(id);
}
});
symbolsToRemove.forEach(id => this.symbolIndex.delete(id));
}
inferArea(filePath) {
const p = filePath.toLowerCase();
if (p.includes('/api/') || p.includes('/server/') || p.includes('/backend/')) {
return 'backend';
}
if (p.includes('/components/') || p.includes('/pages/') || p.includes('/frontend/')) {
return 'frontend';
}
if (p.includes('/utils/') || p.includes('/lib/') || p.includes('/helpers/')) {
return 'utils';
}
if (p.includes('/types/') || p.endsWith('.d.ts')) {
return 'types';
}
if (p.includes('/test/') || p.includes('/__tests__/')) {
return 'tests';
}
return 'other';
}
async updateManifest() {
const stats = await this.vectorStore.getStats();
const codeGraph = this.analyzer.getCodeGraph();
const manifest = {
mode: 'smart',
index: this.indexName,
folder: this.rootPath,
embed_model: this.config.EMBED_MODEL,
last_built: new Date().toISOString(),
stats: {
total_files: this.fileIndex.size,
total_symbols: this.symbolIndex.size,
total_chunks: stats.documentCount,
total_imports: codeGraph.imports.length,
complexity_average: this.calculateAverageComplexity()
},
entry_points: this.entryPoints,
watch_mode: this.watchMode
};
const manifestPath = path.join(this.rootPath, 'ai_index/manifest.json');
await fs.mkdir(path.dirname(manifestPath), { recursive: true });
await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
}
async getGitIgnoredFiles() {
try {
// Check if this is a git repository
execSync('git rev-parse --git-dir', {
cwd: this.rootPath,
stdio: 'pipe'
});
// Get list of ignored files
const output = execSync('git ls-files --others --ignored --exclude-standard', {
cwd: this.rootPath,
encoding: 'utf-8',
stdio: 'pipe'
});
return output
.split('\n')
.filter(line => line.trim())
.map(file => file.trim());
} catch (error) {
// Not a git repository or git not available
return [];
}
}
async resolveImport(fromFile, importPath) {
const dir = path.dirname(fromFile);
const fullImportPath = path.resolve(dir, importPath);
// Try different extensions in order of preference (only JS now)
const extensions = ['.js', '.mjs'];
// If import already has extension, try it first
if (path.extname(importPath)) {
try {
await fs.access(fullImportPath);
return fullImportPath;
} catch {
// File doesn't exist, continue with extension attempts
}
}
// Try adding extensions
for (const ext of extensions) {
const pathWithExt = fullImportPath + ext;
try {
await fs.access(pathWithExt);
return pathWithExt;
} catch {
// File doesn't exist, try next extension
}
}
// Try index files
const indexExtensions = ['/index.js', '/index.mjs'];
for (const indexExt of indexExtensions) {
const indexPath = fullImportPath + indexExt;
try {
await fs.access(indexPath);
return indexPath;
} catch {
// Index file doesn't exist, try next
}
}
return null; // Couldn't resolve import
}
isJavaScriptFile(filePath) {
const ext = path.extname(filePath).toLowerCase();
return ['.js', '.mjs'].includes(ext);
}
calculateAverageComplexity() {
let total = 0;
let count = 0;
this.fileIndex.forEach(file => {
if (file.complexity) {
total += file.complexity;
count++;
}
});
return count > 0 ? (total / count).toFixed(2) : 0;
}
getIndexStats() {
return {
files: this.fileIndex.size,
symbols: this.symbolIndex.size,
imports: this.importGraph.size,
codeGraph: this.analyzer.getCodeGraph()
};
}
}
// CLI support
if (import.meta.url === `file://${process.argv[1]}`) {
const args = process.argv.slice(2);
const options = {
rootPath: process.cwd(),
watch: false,
entryPoints: []
};
for (let i = 0; i < args.length; i++) {
switch (args[i]) {
case '--watch':
case '-w':
options.watch = true;
break;
case '--entry':
case '-e':
if (args[i + 1]) {
options.entryPoints.push(args[++i]);
}
break;
case '--help':
console.log(`
Smart Indexer - AI-powered code understanding
Usage:
smart-index [options] [folder]
Options:
--watch, -w Enable continuous file monitoring
--entry, -e <file> Specify entry point(s) for targeted indexing
--help Show this help message
Examples:
smart-index # Index current directory
smart-index --watch # Index with file monitoring
smart-index -e src/index.js # Index from entry point
smart-index -e src/app.js -e src/api.js --watch
`);
process.exit(0);
default:
if (!args[i].startsWith('-')) {
options.rootPath = path.resolve(args[i]);
}
}
}
const indexer = new SmartIndexer(options);
indexer.start().catch(console.error);
}