UNPKG

giga-code

Version:

A personal AI CLI assistant powered by Grok for local development.

292 lines 11.3 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.ChunkingService = void 0; const fs = __importStar(require("fs")); const path = __importStar(require("path")); const glob_1 = require("glob"); const code_parser_1 = require("../utils/code-parser"); const rag_config_1 = require("../utils/rag-config"); class ChunkingService { constructor(projectPath = process.cwd(), config) { this.projectPath = projectPath; this.config = config || rag_config_1.RAGConfigManager.loadConfig(projectPath); } async chunkProject() { const stats = { totalFiles: 0, processedFiles: 0, totalChunks: 0, skippedFiles: 0, errors: [] }; const chunks = []; try { // Find all files matching include patterns const allFiles = await this.findFiles(); stats.totalFiles = allFiles.length; // Process files in batches to avoid memory issues const batchSize = 50; for (let i = 0; i < allFiles.length; i += batchSize) { const batch = allFiles.slice(i, i + batchSize); for (const filePath of batch) { try { const fileChunks = await this.chunkFile(filePath); chunks.push(...fileChunks); stats.processedFiles++; stats.totalChunks += fileChunks.length; } catch (error) { stats.skippedFiles++; stats.errors.push(`${filePath}: ${error.message}`); } } } if (stats.errors.length > 0) { } return { chunks, stats }; } catch (error) { stats.errors.push(`Project chunking failed: ${error.message}`); throw error; } } async chunkFile(filePath) { try { // Check if file should be excluded if (this.shouldExcludeFile(filePath)) { return []; } // Check file size before reading const absolutePath = path.isAbsolute(filePath) ? filePath : path.join(this.projectPath, filePath); const stats = fs.statSync(absolutePath); const fileSizeKB = stats.size / 1024; // Skip files that exceed the configured size limit const maxFileSizeKB = this.config.maxFileSizeKB || 500; if (fileSizeKB > maxFileSizeKB) { return []; } // Read file content const content = fs.readFileSync(absolutePath, 'utf-8'); // Skip empty files if (content.trim().length === 0) { return []; } // Use logical chunking if enabled and supported if (this.config.chunkingStrategy === 'logical') { const chunks = code_parser_1.CodeParser.parseFile(filePath, content); // If logical parsing failed or produced no chunks, fall back to fixed chunking if (chunks.length === 0) { return this.createFixedChunks(filePath, content); } return chunks; } else { return this.createFixedChunks(filePath, content); } } catch (error) { throw new Error(`Failed to chunk file ${filePath}: ${error.message}`); } } async findFiles() { const allFiles = []; // Process each include pattern for (const pattern of this.config.includePatterns) { try { const files = await (0, glob_1.glob)(pattern, { cwd: this.projectPath, ignore: this.config.excludePatterns, nodir: true, absolute: false }); allFiles.push(...files); } catch (error) { } } // Remove duplicates and sort const uniqueFiles = [...new Set(allFiles)].sort(); // Limit the number of files to process const maxFiles = this.config.maxFiles || 1000; if (uniqueFiles.length > maxFiles) { return uniqueFiles.slice(0, maxFiles); } return uniqueFiles; } shouldExcludeFile(filePath) { const relativePath = path.relative(this.projectPath, filePath); // Check exclude patterns for (const pattern of this.config.excludePatterns) { // Convert glob pattern to regex for testing const regexPattern = pattern .replace(/\*\*/g, '.*') .replace(/\*/g, '[^/]*') .replace(/\?/g, '[^/]'); const regex = new RegExp(`^${regexPattern}$`); if (regex.test(relativePath) || regex.test(filePath)) { return true; } } return false; } createFixedChunks(filePath, content) { const maxChunkSize = 2000; // characters const overlap = 200; // characters overlap between chunks if (content.length <= maxChunkSize) { // File is small enough to be a single chunk return [{ id: this.generateChunkId(filePath, 'file', 'complete', 1), content, filePath, type: 'file', name: 'complete', startLine: 1, endLine: content.split('\n').length, metadata: { language: this.getLanguageFromPath(filePath), size: content.length, chunkingStrategy: 'fixed' } }]; } const chunks = []; const lines = content.split('\n'); let currentPos = 0; let chunkIndex = 1; while (currentPos < content.length) { const chunkEnd = Math.min(currentPos + maxChunkSize, content.length); const chunkContent = content.substring(currentPos, chunkEnd); // Try to end chunk at a line boundary let adjustedEnd = chunkEnd; if (chunkEnd < content.length) { const remainingContent = content.substring(chunkEnd); const nextNewline = remainingContent.indexOf('\n'); if (nextNewline !== -1 && nextNewline < 100) { adjustedEnd = chunkEnd + nextNewline; } } const finalChunkContent = content.substring(currentPos, adjustedEnd); const startLine = content.substring(0, currentPos).split('\n').length; const endLine = content.substring(0, adjustedEnd).split('\n').length; chunks.push({ id: this.generateChunkId(filePath, 'file', `chunk-${chunkIndex}`, startLine), content: finalChunkContent, filePath, type: 'file', name: `chunk-${chunkIndex}`, startLine, endLine, metadata: { language: this.getLanguageFromPath(filePath), size: finalChunkContent.length, chunkingStrategy: 'fixed', chunkIndex, totalChunks: 0 // Will be updated after all chunks are created } }); currentPos = adjustedEnd - overlap; chunkIndex++; } // Update total chunks count chunks.forEach(chunk => { chunk.metadata.totalChunks = chunks.length; }); return chunks; } generateChunkId(filePath, type, name, startLine) { const identifier = `${filePath}:${type}:${name}:${startLine}`; return Buffer.from(identifier).toString('base64').substring(0, 16); } getLanguageFromPath(filePath) { const ext = path.extname(filePath).toLowerCase(); const languageMap = { '.ts': 'typescript', '.tsx': 'typescript', '.js': 'javascript', '.jsx': 'javascript', '.py': 'python', '.java': 'java', '.cpp': 'cpp', '.c': 'c', '.h': 'c', '.go': 'go', '.rs': 'rust', '.php': 'php', '.rb': 'ruby', '.swift': 'swift', '.kt': 'kotlin', '.cs': 'csharp', '.scala': 'scala', '.clj': 'clojure', '.sh': 'shell', '.yml': 'yaml', '.yaml': 'yaml', '.json': 'json', '.md': 'markdown' }; return languageMap[ext] || 'text'; } updateConfig(newConfig) { this.config = newConfig; } getConfig() { return { ...this.config }; } async getProjectStats() { try { const allFiles = await this.findFiles(); const includedFiles = allFiles.length; // Get total files in project (rough estimate) const allProjectFiles = await (0, glob_1.glob)('**/*', { cwd: this.projectPath, nodir: true, absolute: false }); const totalFiles = allProjectFiles.length; const excludedFiles = totalFiles - includedFiles; // Estimate chunks (assume average 3 chunks per file for logical, 1 for fixed) const avgChunksPerFile = this.config.chunkingStrategy === 'logical' ? 3 : 1; const estimatedChunks = includedFiles * avgChunksPerFile; return { totalFiles, includedFiles, excludedFiles, estimatedChunks }; } catch (error) { console.error('Failed to get project stats:', error); return { totalFiles: 0, includedFiles: 0, excludedFiles: 0, estimatedChunks: 0 }; } } } exports.ChunkingService = ChunkingService; //# sourceMappingURL=chunking-service.js.map