UNPKG

codesummary

Version:

Cross-platform CLI tool that generates professional PDF documentation and RAG-optimized JSON outputs from project source code. Perfect for code reviews, audits, documentation, and AI/ML applications with semantic chunking and precision offsets.

374 lines (341 loc) • 10.2 kB
import fs from "fs-extra"; import yaml from "js-yaml"; import path from "path"; /** * RAG Configuration Manager * Loads and validates configuration from raggen.config.yaml */ export class RagConfigManager { constructor() { this.defaultConfig = this.getDefaultConfig(); this.configPath = null; this.loadedConfig = null; } /** * Load configuration from YAML file * @param {string} configPath - Path to config file (optional) * @returns {object} Merged configuration */ async loadConfig(configPath = null) { // Try to find config file this.configPath = configPath || (await this.findConfigFile()); if (this.configPath && (await fs.pathExists(this.configPath))) { try { const yamlContent = await fs.readFile(this.configPath, "utf8"); const userConfig = yaml.load(yamlContent); // Merge with defaults this.loadedConfig = this.mergeConfigs(this.defaultConfig, userConfig); console.log(`šŸ“‹ RAG config loaded from: ${this.configPath}`); return this.loadedConfig; } catch (error) { console.warn(`āš ļø Error loading RAG config: ${error.message}`); console.log(`šŸ“‹ Using default RAG configuration`); return this.defaultConfig; } } else { console.log(`šŸ“‹ No RAG config found, using defaults`); return this.defaultConfig; } } /** * Find configuration file in common locations * @returns {string|null} Path to config file or null */ async findConfigFile() { const searchPaths = [ "raggen.config.yaml", "raggen.config.yml", ".raggen.config.yaml", ".raggen.config.yml", "config/raggen.yaml", "config/raggen.yml", ]; for (const searchPath of searchPaths) { if (await fs.pathExists(searchPath)) { return path.resolve(searchPath); } } return null; } /** * Get default configuration * @returns {object} Default config */ getDefaultConfig() { return { extensions: { include: [ ".json", ".ts", ".js", ".jsx", ".tsx", ".xml", ".html", ".css", ".scss", ".md", ".txt", ".py", ".java", ".cs", ".cpp", ".c", ".h", ".yaml", ".yml", ".sh", ".bat", ], }, chunking: { maxTokens: 1000, overlap: 200, tokenEstimation: "ceil(length/4)", }, handlers: { code: { splitByFunction: true, detectImports: true, detectCalls: true, complexityAnalysis: true, }, markup: { splitByElement: true, preserveStructure: true, }, styling: { splitByRule: true, detectImports: true, }, config: { splitBySection: true, validateSyntax: false, }, }, paths: { exclude: [ "node_modules", ".git", "dist", "build", "coverage", "out", "__pycache__", ".next", ".nuxt", ".cache", "tmp", "temp", "logs", "bower_components", "vendor", ], }, files: { exclude: [ "*-lock.json", "*.lock", "composer.lock", "Pipfile.lock", "*.min.js", "*.min.css", "*.map", ".DS_Store", "Thumbs.db", "*-lock.yaml", ], }, performance: { maxWorkers: 1, batchSize: 50, maxFileSize: "100MB", streamingThreshold: "10MB", }, output: { format: "json", compression: false, validation: true, indexing: true, }, metadata: { calculateHashes: true, extractTags: true, trackRelationships: true, includeStats: true, }, logging: { level: "info", progressReporting: true, statisticsReporting: true, }, quality: { maxChunkSize: "50KB", maxOutputSize: "250MB", duplicateDetection: true, emptyChunkHandling: "skip", }, }; } /** * Deep merge configuration objects * @param {object} defaultConfig - Default configuration * @param {object} userConfig - User configuration * @returns {object} Merged configuration */ mergeConfigs(defaultConfig, userConfig) { const merged = JSON.parse(JSON.stringify(defaultConfig)); // Deep clone return this.deepMerge(merged, userConfig); } /** * Recursively merge objects * @param {object} target - Target object * @param {object} source - Source object * @returns {object} Merged object */ deepMerge(target, source) { for (const key in source) { if (source.hasOwnProperty(key)) { if ( source[key] && typeof source[key] === "object" && !Array.isArray(source[key]) ) { // Recursive merge for objects if (!target[key] || typeof target[key] !== "object") { target[key] = {}; } this.deepMerge(target[key], source[key]); } else { // Direct assignment for primitives and arrays target[key] = source[key]; } } } return target; } /** * Validate configuration * @param {object} config - Configuration to validate * @returns {boolean} True if valid */ validateConfig(config) { const errors = []; // Validate required sections const requiredSections = ["extensions", "chunking", "handlers"]; for (const section of requiredSections) { if (!config[section]) { errors.push(`Missing required section: ${section}`); } } // Validate chunking settings if (config.chunking) { if ( typeof config.chunking.maxTokens !== "number" || config.chunking.maxTokens <= 0 ) { errors.push("chunking.maxTokens must be a positive number"); } if ( typeof config.chunking.overlap !== "number" || config.chunking.overlap < 0 ) { errors.push("chunking.overlap must be a non-negative number"); } } // Validate extensions if (config.extensions && config.extensions.include) { if (!Array.isArray(config.extensions.include)) { errors.push("extensions.include must be an array"); } else { for (const ext of config.extensions.include) { if (typeof ext !== "string" || !ext.startsWith(".")) { errors.push(`Invalid extension: ${ext} (must start with dot)`); } } } } if (errors.length > 0) { console.error("āŒ RAG Configuration validation errors:"); errors.forEach((error) => console.error(` • ${error}`)); return false; } return true; } /** * Get configuration value with dot notation * @param {string} path - Configuration path (e.g., 'chunking.maxTokens') * @param {any} defaultValue - Default value if not found * @returns {any} Configuration value */ get(path, defaultValue = null) { const config = this.loadedConfig || this.defaultConfig; return path.split(".").reduce((obj, key) => { return obj && obj[key] !== undefined ? obj[key] : defaultValue; }, config); } /** * Display current configuration */ displayConfig() { const config = this.loadedConfig || this.defaultConfig; console.log("\nšŸ“‹ RAG Generator Configuration:"); console.log(` Source: ${this.configPath ? this.configPath : "defaults"}`); console.log(` Extensions: ${config.extensions.include.length} types`); console.log(` Max tokens per chunk: ${config.chunking.maxTokens}`); console.log(` Token overlap: ${config.chunking.overlap}`); console.log(` Max workers: ${config.performance.maxWorkers}`); console.log(` Batch size: ${config.performance.batchSize}`); console.log(); } /** * Parse file size string to bytes * @param {string} sizeStr - Size string (e.g., '100MB', '1GB') * @returns {number} Size in bytes */ parseFileSize(sizeStr) { if (typeof sizeStr === "number") return sizeStr; const units = { B: 1, KB: 1024, MB: 1024 * 1024, GB: 1024 * 1024 * 1024, }; const match = sizeStr.match(/^(\d+(?:\.\d+)?)\s*([KMGT]?B)$/i); if (!match) return 0; const value = parseFloat(match[1]); const unit = match[2].toUpperCase(); return Math.floor(value * (units[unit] || 1)); } /** * Check if file should be excluded by path * @param {string} filePath - File path to check * @returns {boolean} True if should be excluded */ shouldExcludePath(filePath) { const config = this.loadedConfig || this.defaultConfig; const excludePaths = config.paths?.exclude || []; return excludePaths.some((pattern) => { return ( filePath.includes(pattern) || filePath.includes(path.sep + pattern + path.sep) ); }); } /** * Check if file should be excluded by filename pattern * @param {string} fileName - File name to check * @returns {boolean} True if should be excluded */ shouldExcludeFile(fileName) { const config = this.loadedConfig || this.defaultConfig; const excludeFiles = config.files?.exclude || []; return excludeFiles.some((pattern) => { // Simple glob pattern matching const regexPattern = pattern.replace(/\./g, "\\.").replace(/\*/g, ".*"); const regex = new RegExp(`^${regexPattern}$`, "i"); return regex.test(fileName); }); } } export default new RagConfigManager();