ultimate-mcp-server
Version:
The definitive all-in-one Model Context Protocol server for AI-assisted coding across 30+ platforms
395 lines • 13.7 kB
JavaScript
/**
* File Collector for Large Context Analysis
* Recursively collects files matching patterns for analysis
*/
import * as fs from 'fs/promises';
import * as path from 'path';
import { createHash } from 'crypto';
export class FileCollector {
defaultExcludes = [
'node_modules',
'.git',
'.svn',
'.hg',
'dist',
'build',
'coverage',
'.next',
'.nuxt',
'.cache',
'__pycache__',
'*.pyc',
'*.pyo',
'.DS_Store',
'Thumbs.db',
'*.log',
'*.lock',
'package-lock.json',
'yarn.lock',
'pnpm-lock.yaml'
];
/**
* Collect files matching the specified pattern
*/
async collect(options) {
const startTime = Date.now();
const files = [];
const visitedDirs = new Set();
// Compile regex pattern
const regex = new RegExp(options.pattern);
// Prepare exclude patterns
const excludePatterns = [
...this.defaultExcludes,
...(options.exclude || [])
].map(pattern => this.compilePattern(pattern));
// Collect files recursively
await this.collectRecursive(options.rootDir, options.rootDir, regex, excludePatterns, files, visitedDirs, options, 0);
// Sort files if requested
if (options.sortBy) {
this.sortFiles(files, options.sortBy);
}
// Calculate metadata
const metadata = this.calculateMetadata(files);
return {
rootDir: options.rootDir,
pattern: options.pattern,
files,
totalSize: files.reduce((sum, f) => sum + f.size, 0),
totalFiles: files.length,
totalTokens: files.reduce((sum, f) => sum + (f.metadata?.tokens || 0), 0),
collectedAt: new Date(),
metadata
};
}
/**
* Generate directory tree structure
*/
async generateDirectoryTree(rootDir, options) {
const stats = await fs.stat(rootDir);
if (!stats.isDirectory()) {
throw new Error('Root path must be a directory');
}
return this.buildDirectoryTree(rootDir, path.basename(rootDir), options?.maxDepth || 5, 0, options?.includeFiles ?? true, options?.pattern ? new RegExp(options.pattern) : undefined);
}
/**
* Collect files recursively
*/
async collectRecursive(currentPath, rootDir, pattern, excludePatterns, files, visitedDirs, options, depth) {
// Check max depth
if (options.maxDepth !== undefined && options.maxDepth !== -1 && depth > options.maxDepth) {
return;
}
// Skip if already visited (symlink protection)
const realPath = await fs.realpath(currentPath);
if (visitedDirs.has(realPath)) {
return;
}
visitedDirs.add(realPath);
try {
const entries = await fs.readdir(currentPath, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(currentPath, entry.name);
const relativePath = path.relative(rootDir, fullPath);
// Check excludes
if (this.shouldExclude(relativePath, excludePatterns)) {
continue;
}
// Skip hidden files/dirs if not included
if (!options.includeHidden && entry.name.startsWith('.')) {
continue;
}
if (entry.isDirectory()) {
await this.collectRecursive(fullPath, rootDir, pattern, excludePatterns, files, visitedDirs, options, depth + 1);
}
else if (entry.isFile() && pattern.test(fullPath)) {
const file = await this.collectFile(fullPath, relativePath, options);
if (file) {
files.push(file);
}
}
}
}
catch (error) {
console.error(`Error reading directory ${currentPath}:`, error);
}
}
/**
* Collect a single file
*/
async collectFile(fullPath, relativePath, options) {
try {
const stats = await fs.stat(fullPath);
// Check file size limit
const sizeMB = stats.size / (1024 * 1024);
if (options.maxFileSize && sizeMB > options.maxFileSize) {
return null;
}
// Read file content
const content = await fs.readFile(fullPath, 'utf-8');
// Detect language
const language = this.detectLanguage(fullPath);
// Calculate metadata
const lines = content.split('\n').length;
const tokens = this.estimateTokens(content);
const hash = createHash('md5').update(content).digest('hex');
return {
path: fullPath,
relativePath,
content,
size: stats.size,
modified: stats.mtime,
language,
encoding: 'utf-8',
metadata: options.includeMetadata ? {
lines,
tokens,
hash
} : undefined
};
}
catch (error) {
console.error(`Error reading file ${fullPath}:`, error);
return null;
}
}
/**
* Build directory tree recursively
*/
async buildDirectoryTree(currentPath, name, maxDepth, currentDepth, includeFiles, pattern) {
const stats = await fs.stat(currentPath);
if (!stats.isDirectory()) {
return {
name,
path: currentPath,
type: 'file',
size: stats.size,
extension: path.extname(name),
language: this.detectLanguage(currentPath)
};
}
const tree = {
name,
path: currentPath,
type: 'directory',
children: []
};
if (currentDepth >= maxDepth) {
return tree;
}
try {
const entries = await fs.readdir(currentPath, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(currentPath, entry.name);
// Skip hidden entries
if (entry.name.startsWith('.')) {
continue;
}
if (entry.isDirectory()) {
const subTree = await this.buildDirectoryTree(fullPath, entry.name, maxDepth, currentDepth + 1, includeFiles, pattern);
tree.children.push(subTree);
}
else if (includeFiles && (!pattern || pattern.test(fullPath))) {
const stats = await fs.stat(fullPath);
tree.children.push({
name: entry.name,
path: fullPath,
type: 'file',
size: stats.size,
extension: path.extname(entry.name),
language: this.detectLanguage(fullPath)
});
}
}
// Sort children
tree.children.sort((a, b) => {
// Directories first
if (a.type !== b.type) {
return a.type === 'directory' ? -1 : 1;
}
return a.name.localeCompare(b.name);
});
}
catch (error) {
console.error(`Error reading directory ${currentPath}:`, error);
}
return tree;
}
/**
* Compile pattern to regex
*/
compilePattern(pattern) {
// If already a regex pattern
if (pattern.startsWith('/') && pattern.endsWith('/')) {
return new RegExp(pattern.slice(1, -1));
}
// Convert glob to regex
const escaped = pattern
.replace(/[.+^${}()|[\]\\]/g, '\\$&')
.replace(/\*/g, '.*')
.replace(/\?/g, '.');
return new RegExp(`(^|/)${escaped}($|/)`);
}
/**
* Check if path should be excluded
*/
shouldExclude(path, excludePatterns) {
return excludePatterns.some(pattern => pattern.test(path));
}
/**
* Detect programming language from file extension
*/
detectLanguage(filePath) {
const ext = path.extname(filePath).toLowerCase();
const languageMap = {
'.js': 'javascript',
'.jsx': 'javascript',
'.ts': 'typescript',
'.tsx': 'typescript',
'.py': 'python',
'.java': 'java',
'.cpp': 'cpp',
'.c': 'c',
'.cs': 'csharp',
'.go': 'go',
'.rs': 'rust',
'.php': 'php',
'.rb': 'ruby',
'.swift': 'swift',
'.kt': 'kotlin',
'.scala': 'scala',
'.r': 'r',
'.m': 'matlab',
'.lua': 'lua',
'.pl': 'perl',
'.sh': 'bash',
'.sql': 'sql',
'.html': 'html',
'.css': 'css',
'.scss': 'scss',
'.sass': 'sass',
'.less': 'less',
'.xml': 'xml',
'.json': 'json',
'.yaml': 'yaml',
'.yml': 'yaml',
'.md': 'markdown',
'.tex': 'latex',
'.vue': 'vue',
'.svelte': 'svelte'
};
return languageMap[ext];
}
/**
* Estimate token count (rough approximation)
*/
estimateTokens(content) {
// Rough estimation: ~4 characters per token
return Math.ceil(content.length / 4);
}
/**
* Sort files by specified criteria
*/
sortFiles(files, sortBy) {
files.sort((a, b) => {
switch (sortBy) {
case 'path':
return a.relativePath.localeCompare(b.relativePath);
case 'size':
return b.size - a.size;
case 'modified':
return b.modified.getTime() - a.modified.getTime();
case 'name':
return path.basename(a.path).localeCompare(path.basename(b.path));
default:
return 0;
}
});
}
/**
* Calculate collection metadata
*/
calculateMetadata(files) {
const languages = {};
const directories = new Set();
let largestFile;
let oldestFile;
let newestFile;
for (const file of files) {
// Count languages
if (file.language) {
languages[file.language] = (languages[file.language] || 0) + 1;
}
// Collect directories
directories.add(path.dirname(file.relativePath));
// Track extremes
if (!largestFile || file.size > largestFile.size) {
largestFile = file;
}
if (!oldestFile || file.modified < oldestFile.modified) {
oldestFile = file;
}
if (!newestFile || file.modified > newestFile.modified) {
newestFile = file;
}
}
return {
languages,
directories: Array.from(directories).sort(),
largestFile: largestFile?.relativePath,
oldestFile: oldestFile?.relativePath,
newestFile: newestFile?.relativePath
};
}
/**
* Format collection as context
*/
formatAsContext(collection, format = 'structured') {
switch (format) {
case 'plain':
return this.formatPlain(collection);
case 'xml':
return this.formatXML(collection);
case 'json':
return JSON.stringify(collection, null, 2);
default:
return this.formatStructured(collection);
}
}
formatPlain(collection) {
let context = `# File Collection from ${collection.rootDir}\n\n`;
context += `Pattern: ${collection.pattern}\n`;
context += `Total Files: ${collection.totalFiles}\n`;
context += `Total Size: ${(collection.totalSize / 1024 / 1024).toFixed(2)} MB\n\n`;
for (const file of collection.files) {
context += `\n${'='.repeat(80)}\n`;
context += `File: ${file.relativePath}\n`;
context += `${'='.repeat(80)}\n\n`;
context += file.content;
context += '\n';
}
return context;
}
formatStructured(collection) {
let context = `<file_collection root="${collection.rootDir}" pattern="${collection.pattern}">\n`;
context += `<summary files="${collection.totalFiles}" size="${collection.totalSize}" />\n\n`;
for (const file of collection.files) {
context += `<file path="${file.relativePath}" size="${file.size}" language="${file.language || 'unknown'}">\n`;
context += `<content>\n${this.escapeXML(file.content)}\n</content>\n`;
context += `</file>\n\n`;
}
context += `</file_collection>`;
return context;
}
formatXML(collection) {
return this.formatStructured(collection);
}
escapeXML(text) {
return text
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
}
}
//# sourceMappingURL=file-collector.js.map