giga-code
Version:
A personal AI CLI assistant powered by Grok for local development.
292 lines • 11.3 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.ChunkingService = void 0;
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const glob_1 = require("glob");
const code_parser_1 = require("../utils/code-parser");
const rag_config_1 = require("../utils/rag-config");
class ChunkingService {
constructor(projectPath = process.cwd(), config) {
this.projectPath = projectPath;
this.config = config || rag_config_1.RAGConfigManager.loadConfig(projectPath);
}
async chunkProject() {
const stats = {
totalFiles: 0,
processedFiles: 0,
totalChunks: 0,
skippedFiles: 0,
errors: []
};
const chunks = [];
try {
// Find all files matching include patterns
const allFiles = await this.findFiles();
stats.totalFiles = allFiles.length;
// Process files in batches to avoid memory issues
const batchSize = 50;
for (let i = 0; i < allFiles.length; i += batchSize) {
const batch = allFiles.slice(i, i + batchSize);
for (const filePath of batch) {
try {
const fileChunks = await this.chunkFile(filePath);
chunks.push(...fileChunks);
stats.processedFiles++;
stats.totalChunks += fileChunks.length;
}
catch (error) {
stats.skippedFiles++;
stats.errors.push(`${filePath}: ${error.message}`);
}
}
}
if (stats.errors.length > 0) {
}
return { chunks, stats };
}
catch (error) {
stats.errors.push(`Project chunking failed: ${error.message}`);
throw error;
}
}
async chunkFile(filePath) {
try {
// Check if file should be excluded
if (this.shouldExcludeFile(filePath)) {
return [];
}
// Check file size before reading
const absolutePath = path.isAbsolute(filePath) ? filePath : path.join(this.projectPath, filePath);
const stats = fs.statSync(absolutePath);
const fileSizeKB = stats.size / 1024;
// Skip files that exceed the configured size limit
const maxFileSizeKB = this.config.maxFileSizeKB || 500;
if (fileSizeKB > maxFileSizeKB) {
return [];
}
// Read file content
const content = fs.readFileSync(absolutePath, 'utf-8');
// Skip empty files
if (content.trim().length === 0) {
return [];
}
// Use logical chunking if enabled and supported
if (this.config.chunkingStrategy === 'logical') {
const chunks = code_parser_1.CodeParser.parseFile(filePath, content);
// If logical parsing failed or produced no chunks, fall back to fixed chunking
if (chunks.length === 0) {
return this.createFixedChunks(filePath, content);
}
return chunks;
}
else {
return this.createFixedChunks(filePath, content);
}
}
catch (error) {
throw new Error(`Failed to chunk file ${filePath}: ${error.message}`);
}
}
async findFiles() {
const allFiles = [];
// Process each include pattern
for (const pattern of this.config.includePatterns) {
try {
const files = await (0, glob_1.glob)(pattern, {
cwd: this.projectPath,
ignore: this.config.excludePatterns,
nodir: true,
absolute: false
});
allFiles.push(...files);
}
catch (error) {
}
}
// Remove duplicates and sort
const uniqueFiles = [...new Set(allFiles)].sort();
// Limit the number of files to process
const maxFiles = this.config.maxFiles || 1000;
if (uniqueFiles.length > maxFiles) {
return uniqueFiles.slice(0, maxFiles);
}
return uniqueFiles;
}
shouldExcludeFile(filePath) {
const relativePath = path.relative(this.projectPath, filePath);
// Check exclude patterns
for (const pattern of this.config.excludePatterns) {
// Convert glob pattern to regex for testing
const regexPattern = pattern
.replace(/\*\*/g, '.*')
.replace(/\*/g, '[^/]*')
.replace(/\?/g, '[^/]');
const regex = new RegExp(`^${regexPattern}$`);
if (regex.test(relativePath) || regex.test(filePath)) {
return true;
}
}
return false;
}
createFixedChunks(filePath, content) {
const maxChunkSize = 2000; // characters
const overlap = 200; // characters overlap between chunks
if (content.length <= maxChunkSize) {
// File is small enough to be a single chunk
return [{
id: this.generateChunkId(filePath, 'file', 'complete', 1),
content,
filePath,
type: 'file',
name: 'complete',
startLine: 1,
endLine: content.split('\n').length,
metadata: {
language: this.getLanguageFromPath(filePath),
size: content.length,
chunkingStrategy: 'fixed'
}
}];
}
const chunks = [];
const lines = content.split('\n');
let currentPos = 0;
let chunkIndex = 1;
while (currentPos < content.length) {
const chunkEnd = Math.min(currentPos + maxChunkSize, content.length);
const chunkContent = content.substring(currentPos, chunkEnd);
// Try to end chunk at a line boundary
let adjustedEnd = chunkEnd;
if (chunkEnd < content.length) {
const remainingContent = content.substring(chunkEnd);
const nextNewline = remainingContent.indexOf('\n');
if (nextNewline !== -1 && nextNewline < 100) {
adjustedEnd = chunkEnd + nextNewline;
}
}
const finalChunkContent = content.substring(currentPos, adjustedEnd);
const startLine = content.substring(0, currentPos).split('\n').length;
const endLine = content.substring(0, adjustedEnd).split('\n').length;
chunks.push({
id: this.generateChunkId(filePath, 'file', `chunk-${chunkIndex}`, startLine),
content: finalChunkContent,
filePath,
type: 'file',
name: `chunk-${chunkIndex}`,
startLine,
endLine,
metadata: {
language: this.getLanguageFromPath(filePath),
size: finalChunkContent.length,
chunkingStrategy: 'fixed',
chunkIndex,
totalChunks: 0 // Will be updated after all chunks are created
}
});
currentPos = adjustedEnd - overlap;
chunkIndex++;
}
// Update total chunks count
chunks.forEach(chunk => {
chunk.metadata.totalChunks = chunks.length;
});
return chunks;
}
generateChunkId(filePath, type, name, startLine) {
const identifier = `${filePath}:${type}:${name}:${startLine}`;
return Buffer.from(identifier).toString('base64').substring(0, 16);
}
getLanguageFromPath(filePath) {
const ext = path.extname(filePath).toLowerCase();
const languageMap = {
'.ts': 'typescript',
'.tsx': 'typescript',
'.js': 'javascript',
'.jsx': 'javascript',
'.py': 'python',
'.java': 'java',
'.cpp': 'cpp',
'.c': 'c',
'.h': 'c',
'.go': 'go',
'.rs': 'rust',
'.php': 'php',
'.rb': 'ruby',
'.swift': 'swift',
'.kt': 'kotlin',
'.cs': 'csharp',
'.scala': 'scala',
'.clj': 'clojure',
'.sh': 'shell',
'.yml': 'yaml',
'.yaml': 'yaml',
'.json': 'json',
'.md': 'markdown'
};
return languageMap[ext] || 'text';
}
updateConfig(newConfig) {
this.config = newConfig;
}
getConfig() {
return { ...this.config };
}
async getProjectStats() {
try {
const allFiles = await this.findFiles();
const includedFiles = allFiles.length;
// Get total files in project (rough estimate)
const allProjectFiles = await (0, glob_1.glob)('**/*', {
cwd: this.projectPath,
nodir: true,
absolute: false
});
const totalFiles = allProjectFiles.length;
const excludedFiles = totalFiles - includedFiles;
// Estimate chunks (assume average 3 chunks per file for logical, 1 for fixed)
const avgChunksPerFile = this.config.chunkingStrategy === 'logical' ? 3 : 1;
const estimatedChunks = includedFiles * avgChunksPerFile;
return {
totalFiles,
includedFiles,
excludedFiles,
estimatedChunks
};
}
catch (error) {
console.error('Failed to get project stats:', error);
return {
totalFiles: 0,
includedFiles: 0,
excludedFiles: 0,
estimatedChunks: 0
};
}
}
}
exports.ChunkingService = ChunkingService;
//# sourceMappingURL=chunking-service.js.map