UNPKG

zrald1

Version:

Advanced Graph RAG MCP Server with file location identification, graph processing, and result summarization capabilities

254 lines 10 kB
import * as fs from 'fs/promises'; import * as path from 'path'; import { v4 as uuidv4 } from 'uuid'; export class FileProcessor { options; defaultOptions = { recursive: true, maxFileSize: 10 * 1024 * 1024, // 10MB allowedExtensions: ['.txt', '.md', '.js', '.ts', '.py', '.json', '.xml', '.html', '.css'], excludePatterns: ['node_modules', '.git', 'dist', 'build', '.env'], includeContent: true }; constructor(options = {}) { this.options = options; this.options = { ...this.defaultOptions, ...options }; } async identifyFiles(searchOptions) { const files = []; const { query, searchPaths, fileTypes, recursive = true, maxResults = 50, caseSensitive = false } = searchOptions; for (const searchPath of searchPaths) { try { const foundFiles = await this.searchInDirectory(searchPath, query, fileTypes, recursive, caseSensitive); files.push(...foundFiles); if (files.length >= maxResults) { break; } } catch (error) { console.warn(`Error searching in ${searchPath}:`, error instanceof Error ? error.message : String(error)); } } return files.slice(0, maxResults); } async searchInDirectory(dirPath, query, fileTypes, recursive = true, caseSensitive = false) { const files = []; const queryLower = caseSensitive ? query : query.toLowerCase(); try { const entries = await fs.readdir(dirPath, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dirPath, entry.name); // Skip excluded patterns if (this.shouldExclude(entry.name)) { continue; } if (entry.isDirectory() && recursive) { const subFiles = await this.searchInDirectory(fullPath, query, fileTypes, recursive, caseSensitive); files.push(...subFiles); } else if (entry.isFile()) { const fileName = caseSensitive ? entry.name : entry.name.toLowerCase(); const extension = path.extname(entry.name); // Check file type filter if (fileTypes && fileTypes.length > 0) { if (!fileTypes.includes(extension)) { continue; } } // Check if file matches query if (fileName.includes(queryLower)) { const file = await this.processFile(fullPath); if (file) { files.push(file); } } else if (this.options.includeContent) { // Search in file content const hasContentMatch = await this.searchInFileContent(fullPath, queryLower, caseSensitive); if (hasContentMatch) { const file = await this.processFile(fullPath); if (file) { files.push(file); } } } } } } catch (error) { console.warn(`Error reading directory ${dirPath}:`, error instanceof Error ? error.message : String(error)); } return files; } async searchInFileContent(filePath, query, caseSensitive) { try { const stats = await fs.stat(filePath); // Skip large files if (stats.size > this.options.maxFileSize) { return false; } const extension = path.extname(filePath); // Only search in allowed text files if (!this.options.allowedExtensions.includes(extension)) { return false; } const content = await fs.readFile(filePath, 'utf-8'); const searchContent = caseSensitive ? content : content.toLowerCase(); return searchContent.includes(query); } catch (error) { // File might be binary or inaccessible return false; } } async processFile(filePath) { try { const stats = await fs.stat(filePath); const parsedPath = path.parse(filePath); // Skip large files if (stats.size > this.options.maxFileSize) { console.warn(`File ${filePath} is too large (${stats.size} bytes), skipping`); return null; } const file = { id: uuidv4(), path: filePath, name: parsedPath.name, extension: parsedPath.ext, size: stats.size, metadata: { created: stats.birthtime, modified: stats.mtime, accessed: stats.atime, directory: parsedPath.dir, fullName: parsedPath.base }, created_at: new Date(), updated_at: new Date() }; // Include content if requested and file is text-based if (this.options.includeContent && this.isTextFile(parsedPath.ext)) { try { file.content = await fs.readFile(filePath, 'utf-8'); } catch (error) { console.warn(`Could not read content of ${filePath}:`, error instanceof Error ? error.message : String(error)); } } return file; } catch (error) { console.warn(`Error processing file ${filePath}:`, error instanceof Error ? error.message : String(error)); return null; } } async processDirectory(dirPath) { const files = []; try { const entries = await fs.readdir(dirPath, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dirPath, entry.name); if (this.shouldExclude(entry.name)) { continue; } if (entry.isDirectory() && this.options.recursive) { const subFiles = await this.processDirectory(fullPath); files.push(...subFiles); } else if (entry.isFile()) { const file = await this.processFile(fullPath); if (file) { files.push(file); } } } } catch (error) { console.warn(`Error processing directory ${dirPath}:`, error instanceof Error ? error.message : String(error)); } return files; } filesToNodes(files) { return files.map(file => ({ id: file.id, type: 'file', label: file.name, properties: { path: file.path, extension: file.extension, size: file.size, content: file.content }, metadata: file.metadata, created_at: file.created_at, updated_at: file.updated_at })); } fileToChunks(file, chunkSize = 1000, overlap = 100) { if (!file.content) { return []; } const chunks = []; const content = file.content; let position = 0; for (let i = 0; i < content.length; i += chunkSize - overlap) { const chunkContent = content.slice(i, i + chunkSize); chunks.push({ id: uuidv4(), content: chunkContent, document_id: file.id, position: position++, entities: [], metadata: { file_path: file.path, file_name: file.name, start_char: i, end_char: Math.min(i + chunkSize, content.length) } }); } return chunks; } shouldExclude(name) { return this.options.excludePatterns.some(pattern => name.includes(pattern) || name.startsWith('.')); } isTextFile(extension) { return this.options.allowedExtensions.includes(extension); } // Generate summary of file processing results generateSummary(files) { const summary = { total_files: files.length, file_types: {}, total_size: 0, largest_file: null, smallest_file: null, directories: new Set(), processing_timestamp: new Date().toISOString() }; for (const file of files) { // Count file types const ext = file.extension || 'no_extension'; summary.file_types[ext] = (summary.file_types[ext] || 0) + 1; // Calculate total size summary.total_size += file.size; // Track largest and smallest files if (!summary.largest_file || file.size > summary.largest_file.size) { summary.largest_file = file; } if (!summary.smallest_file || file.size < summary.smallest_file.size) { summary.smallest_file = file; } // Track directories if (file.metadata?.directory) { summary.directories.add(file.metadata.directory); } } return { ...summary, directories: Array.from(summary.directories), average_file_size: files.length > 0 ? summary.total_size / files.length : 0 }; } } //# sourceMappingURL=file-processor.js.map