bigparse
Version:
MCP server that gives Claude instant, intelligent access to your codebase using Language Server Protocol
408 lines • 15.9 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.CodeIndexer = void 0;
const glob_1 = require("glob");
const path = __importStar(require("path"));
const fs = __importStar(require("fs/promises"));
const crypto_1 = require("crypto");
const manager_js_1 = require("../lsp/manager.js");
const cache_js_1 = require("./cache.js");
const events_1 = require("events");
class CodeIndexer extends events_1.EventEmitter {
index = new Map();
lspManager;
indexPath = '.index';
cache;
fileWatcher;
isIndexing = false;
maxConcurrentIndexing = 4;
constructor(rootPath) {
super();
// Use a proper cache directory in user's home folder for MCP servers
const homeDir = process.env.HOME || process.env.USERPROFILE || '';
const cacheBaseDir = process.env.MCP_CACHE_DIR || path.join(homeDir, '.cache', 'bigparse');
const workspaceId = rootPath ? Buffer.from(rootPath).toString('base64').replace(/[/+=]/g, '_') : 'default';
this.indexPath = path.join(cacheBaseDir, workspaceId);
this.lspManager = new manager_js_1.LSPManager(rootPath);
this.cache = new cache_js_1.FileCache(path.join(this.indexPath, 'cache'));
}
async initialize() {
await this.cache.initialize();
await this.lspManager.initialize();
await this.loadIndex().catch(() => {
// Index doesn't exist yet, that's OK
});
}
async indexCodebase(basePath, languages) {
if (this.isIndexing) {
throw new Error('Indexing already in progress');
}
this.isIndexing = true;
const startTime = Date.now();
const errors = [];
let filesIndexed = 0;
let skipped = 0;
try {
await this.ensureIndexDirectory();
this.emit('indexing-start', { basePath, languages });
const patterns = this.getFilePatterns(languages);
const files = [];
for (const pattern of patterns) {
const matches = await (0, glob_1.glob)(pattern, {
cwd: basePath,
absolute: true,
ignore: ['**/node_modules/**', '**/dist/**', '**/.git/**', '**/.*/**'],
});
files.push(...matches);
}
this.emit('files-discovered', { total: files.length });
// Process files in batches for better performance
const batchSize = this.maxConcurrentIndexing;
for (let i = 0; i < files.length; i += batchSize) {
const batch = files.slice(i, i + batchSize);
const results = await Promise.allSettled(batch.map(file => this.indexFile(file)));
results.forEach((result, index) => {
if (result.status === 'fulfilled') {
if (result.value === 'cached' || result.value === 'skipped') {
skipped++;
}
else if (result.value === 'indexed') {
filesIndexed++;
}
}
else {
errors.push(`Failed to index ${batch[index]}: ${result.reason}`);
}
});
this.emit('indexing-progress', {
processed: i + batch.length,
total: files.length,
filesIndexed,
skipped,
errors: errors.length,
});
}
await this.saveIndex();
this.emit('indexing-complete', {
filesIndexed,
skipped,
errors: errors.length,
duration: Date.now() - startTime,
});
}
catch (error) {
errors.push(`Indexing failed: ${error}`);
this.emit('indexing-error', { error });
}
finally {
this.isIndexing = false;
}
return {
filesIndexed,
errors,
duration: Date.now() - startTime,
skipped,
};
}
async searchCode(query, options = {}) {
const results = [];
const limit = options.limit || 50;
const fileTypeFilter = options.fileTypes || [];
let totalMatches = 0;
// Process files in smaller batches to avoid memory issues
const indexEntries = Array.from(this.index.entries());
const batchSize = 5; // Process 5 files at a time
for (let i = 0; i < indexEntries.length && results.length < limit; i += batchSize) {
const batch = indexEntries.slice(i, i + batchSize);
const batchResults = await Promise.all(batch.map(async ([filePath, indexedFile]) => {
if (fileTypeFilter.length > 0) {
const ext = path.extname(filePath);
if (!fileTypeFilter.includes(ext)) {
return null;
}
}
return this.searchInFile(filePath, query, indexedFile);
}));
for (const fileResults of batchResults) {
if (fileResults && fileResults.matches.length > 0) {
results.push(fileResults);
totalMatches += fileResults.matches.length;
if (results.length >= limit) {
break;
}
}
}
// Force garbage collection hint
if (global.gc) {
global.gc();
}
}
console.error(`Search complete: found ${totalMatches} matches in ${results.length} files`);
return results;
}
async indexFile(filePath) {
try {
const stats = await fs.stat(filePath);
// Skip very large files to prevent memory issues
const MAX_INDEX_FILE_SIZE = 20 * 1024 * 1024; // 20MB limit for indexing
if (stats.size > MAX_INDEX_FILE_SIZE) {
console.error(`Skipping ${filePath} - file too large (${(stats.size / 1024 / 1024).toFixed(2)}MB)`);
return 'skipped';
}
const content = await fs.readFile(filePath, 'utf-8');
const hash = (0, crypto_1.createHash)('sha256').update(content).digest('hex');
// Check cache first
const cacheKey = filePath;
const cachedData = await this.cache.get(cacheKey, hash);
if (cachedData) {
this.index.set(filePath, cachedData);
return 'cached';
}
const language = this.detectLanguage(filePath);
const symbols = await this.extractSymbols(filePath, language);
const indexedFile = {
path: filePath,
language,
size: stats.size,
hash,
lastModified: stats.mtime,
symbols,
};
this.index.set(filePath, indexedFile);
await this.cache.set(cacheKey, indexedFile, hash);
return 'indexed';
}
catch (error) {
console.error(`Failed to index ${filePath}:`, error);
return 'skipped';
}
}
async extractSymbols(filePath, _language) {
try {
const lspSymbols = await this.lspManager.getDocumentSymbols(filePath);
const symbols = [];
for (const symbol of lspSymbols) {
// SymbolInformation always has location.range
const range = symbol.location?.range;
if (!range) {
console.warn(`Symbol ${symbol.name} has no range information`);
continue;
}
symbols.push({
name: symbol.name,
kind: this.symbolKindToString(symbol.kind),
location: {
start: {
line: range.start.line,
character: range.start.character,
},
end: {
line: range.end.line,
character: range.end.character,
},
},
containerName: symbol.containerName,
});
}
return symbols;
}
catch (error) {
// Language server might not be available, continue without symbols
console.error(`Failed to extract symbols from ${filePath}:`, error);
return [];
}
}
async searchInFile(filePath, query, indexedFile) {
const matches = [];
try {
// Skip very large files to prevent memory issues
const stats = await fs.stat(filePath);
const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB limit for search
if (stats.size > MAX_FILE_SIZE) {
console.error(`Skipping search in ${filePath} - file too large (${(stats.size / 1024 / 1024).toFixed(2)}MB)`);
return { file: filePath, matches };
}
const content = await fs.readFile(filePath, 'utf-8');
const lines = content.split('\n');
const regex = new RegExp(query, 'gi');
lines.forEach((line, lineIndex) => {
let match;
while ((match = regex.exec(line)) !== null) {
const symbol = this.findSymbolAtLocation(indexedFile.symbols, lineIndex, match.index);
matches.push({
line: lineIndex + 1,
column: match.index + 1,
text: line.trim().substring(0, 200), // Limit text length
symbol,
});
// Limit matches per file
if (matches.length >= 100) {
return;
}
}
});
}
catch (error) {
console.error(`Failed to search in ${filePath}:`, error);
}
return { file: filePath, matches };
}
findSymbolAtLocation(symbols, line, column) {
return symbols.find(symbol => {
return line >= symbol.location.start.line &&
line <= symbol.location.end.line &&
(line !== symbol.location.start.line || column >= symbol.location.start.character) &&
(line !== symbol.location.end.line || column <= symbol.location.end.character);
});
}
detectLanguage(filePath) {
const ext = path.extname(filePath);
const languageMap = {
'.ts': 'typescript',
'.tsx': 'typescript',
'.js': 'javascript',
'.jsx': 'javascript',
'.py': 'python',
'.rs': 'rust',
'.go': 'go',
'.java': 'java',
'.cpp': 'cpp',
'.c': 'c',
'.cs': 'csharp',
'.rb': 'ruby',
'.php': 'php',
'.swift': 'swift',
'.kt': 'kotlin',
'.scala': 'scala',
'.r': 'r',
'.m': 'objc',
'.mm': 'objcpp',
};
return languageMap[ext] || 'plaintext';
}
getFilePatterns(languages) {
const languagePatterns = {
typescript: ['**/*.ts', '**/*.tsx'],
javascript: ['**/*.js', '**/*.jsx'],
python: ['**/*.py'],
rust: ['**/*.rs'],
go: ['**/*.go'],
java: ['**/*.java'],
cpp: ['**/*.cpp', '**/*.cc', '**/*.cxx', '**/*.hpp', '**/*.h'],
c: ['**/*.c', '**/*.h'],
csharp: ['**/*.cs'],
ruby: ['**/*.rb'],
php: ['**/*.php'],
swift: ['**/*.swift'],
kotlin: ['**/*.kt'],
scala: ['**/*.scala'],
r: ['**/*.r', '**/*.R'],
objc: ['**/*.m', '**/*.h'],
objcpp: ['**/*.mm'],
};
if (languages && languages.length > 0) {
const patterns = [];
for (const lang of languages) {
if (languagePatterns[lang]) {
patterns.push(...languagePatterns[lang]);
}
}
return patterns;
}
return Object.values(languagePatterns).flat();
}
symbolKindToString(kind) {
const kinds = [
'File', 'Module', 'Namespace', 'Package', 'Class', 'Method',
'Property', 'Field', 'Constructor', 'Enum', 'Interface',
'Function', 'Variable', 'Constant', 'String', 'Number',
'Boolean', 'Array', 'Object', 'Key', 'Null', 'EnumMember',
'Struct', 'Event', 'Operator', 'TypeParameter'
];
return kinds[kind - 1] || 'Unknown';
}
async ensureIndexDirectory() {
try {
await fs.mkdir(this.indexPath, { recursive: true });
}
catch (error) {
// Directory might already exist
}
}
async saveIndex() {
const indexData = Array.from(this.index.entries());
await fs.writeFile(path.join(this.indexPath, 'index.json'), JSON.stringify(indexData, null, 2));
}
async loadIndex() {
try {
const data = await fs.readFile(path.join(this.indexPath, 'index.json'), 'utf-8');
const indexData = JSON.parse(data);
this.index = new Map(indexData);
}
catch (error) {
// Index doesn't exist yet
}
}
async shutdown() {
this.isIndexing = false;
await this.saveIndex();
await this.lspManager.shutdown();
if (this.fileWatcher) {
this.fileWatcher.close();
}
}
async getCacheStats() {
return this.cache.getStats();
}
async clearCache() {
await this.cache.clear();
this.index.clear();
}
getIndexStats() {
const stats = {
totalFiles: this.index.size,
languages: {},
totalSymbols: 0,
};
for (const file of this.index.values()) {
stats.languages[file.language] = (stats.languages[file.language] || 0) + 1;
stats.totalSymbols += file.symbols.length;
}
return stats;
}
}
exports.CodeIndexer = CodeIndexer;
//# sourceMappingURL=indexer.js.map