codesummary
Version:
Cross-platform CLI tool that generates professional PDF documentation and RAG-optimized JSON outputs from project source code. Perfect for code reviews, audits, documentation, and AI/ML applications with semantic chunking and precision offsets.
374 lines (341 loc) ⢠10.2 kB
JavaScript
import fs from "fs-extra";
import yaml from "js-yaml";
import path from "path";
/**
* RAG Configuration Manager
* Loads and validates configuration from raggen.config.yaml
*/
export class RagConfigManager {
constructor() {
this.defaultConfig = this.getDefaultConfig();
this.configPath = null;
this.loadedConfig = null;
}
/**
* Load configuration from YAML file
* @param {string} configPath - Path to config file (optional)
* @returns {object} Merged configuration
*/
async loadConfig(configPath = null) {
// Try to find config file
this.configPath = configPath || (await this.findConfigFile());
if (this.configPath && (await fs.pathExists(this.configPath))) {
try {
const yamlContent = await fs.readFile(this.configPath, "utf8");
const userConfig = yaml.load(yamlContent);
// Merge with defaults
this.loadedConfig = this.mergeConfigs(this.defaultConfig, userConfig);
console.log(`š RAG config loaded from: ${this.configPath}`);
return this.loadedConfig;
} catch (error) {
console.warn(`ā ļø Error loading RAG config: ${error.message}`);
console.log(`š Using default RAG configuration`);
return this.defaultConfig;
}
} else {
console.log(`š No RAG config found, using defaults`);
return this.defaultConfig;
}
}
/**
* Find configuration file in common locations
* @returns {string|null} Path to config file or null
*/
async findConfigFile() {
const searchPaths = [
"raggen.config.yaml",
"raggen.config.yml",
".raggen.config.yaml",
".raggen.config.yml",
"config/raggen.yaml",
"config/raggen.yml",
];
for (const searchPath of searchPaths) {
if (await fs.pathExists(searchPath)) {
return path.resolve(searchPath);
}
}
return null;
}
/**
* Get default configuration
* @returns {object} Default config
*/
getDefaultConfig() {
return {
extensions: {
include: [
".json",
".ts",
".js",
".jsx",
".tsx",
".xml",
".html",
".css",
".scss",
".md",
".txt",
".py",
".java",
".cs",
".cpp",
".c",
".h",
".yaml",
".yml",
".sh",
".bat",
],
},
chunking: {
maxTokens: 1000,
overlap: 200,
tokenEstimation: "ceil(length/4)",
},
handlers: {
code: {
splitByFunction: true,
detectImports: true,
detectCalls: true,
complexityAnalysis: true,
},
markup: {
splitByElement: true,
preserveStructure: true,
},
styling: {
splitByRule: true,
detectImports: true,
},
config: {
splitBySection: true,
validateSyntax: false,
},
},
paths: {
exclude: [
"node_modules",
".git",
"dist",
"build",
"coverage",
"out",
"__pycache__",
".next",
".nuxt",
".cache",
"tmp",
"temp",
"logs",
"bower_components",
"vendor",
],
},
files: {
exclude: [
"*-lock.json",
"*.lock",
"composer.lock",
"Pipfile.lock",
"*.min.js",
"*.min.css",
"*.map",
".DS_Store",
"Thumbs.db",
"*-lock.yaml",
],
},
performance: {
maxWorkers: 1,
batchSize: 50,
maxFileSize: "100MB",
streamingThreshold: "10MB",
},
output: {
format: "json",
compression: false,
validation: true,
indexing: true,
},
metadata: {
calculateHashes: true,
extractTags: true,
trackRelationships: true,
includeStats: true,
},
logging: {
level: "info",
progressReporting: true,
statisticsReporting: true,
},
quality: {
maxChunkSize: "50KB",
maxOutputSize: "250MB",
duplicateDetection: true,
emptyChunkHandling: "skip",
},
};
}
/**
* Deep merge configuration objects
* @param {object} defaultConfig - Default configuration
* @param {object} userConfig - User configuration
* @returns {object} Merged configuration
*/
mergeConfigs(defaultConfig, userConfig) {
const merged = JSON.parse(JSON.stringify(defaultConfig)); // Deep clone
return this.deepMerge(merged, userConfig);
}
/**
* Recursively merge objects
* @param {object} target - Target object
* @param {object} source - Source object
* @returns {object} Merged object
*/
deepMerge(target, source) {
for (const key in source) {
if (source.hasOwnProperty(key)) {
if (
source[key] &&
typeof source[key] === "object" &&
!Array.isArray(source[key])
) {
// Recursive merge for objects
if (!target[key] || typeof target[key] !== "object") {
target[key] = {};
}
this.deepMerge(target[key], source[key]);
} else {
// Direct assignment for primitives and arrays
target[key] = source[key];
}
}
}
return target;
}
/**
* Validate configuration
* @param {object} config - Configuration to validate
* @returns {boolean} True if valid
*/
validateConfig(config) {
const errors = [];
// Validate required sections
const requiredSections = ["extensions", "chunking", "handlers"];
for (const section of requiredSections) {
if (!config[section]) {
errors.push(`Missing required section: ${section}`);
}
}
// Validate chunking settings
if (config.chunking) {
if (
typeof config.chunking.maxTokens !== "number" ||
config.chunking.maxTokens <= 0
) {
errors.push("chunking.maxTokens must be a positive number");
}
if (
typeof config.chunking.overlap !== "number" ||
config.chunking.overlap < 0
) {
errors.push("chunking.overlap must be a non-negative number");
}
}
// Validate extensions
if (config.extensions && config.extensions.include) {
if (!Array.isArray(config.extensions.include)) {
errors.push("extensions.include must be an array");
} else {
for (const ext of config.extensions.include) {
if (typeof ext !== "string" || !ext.startsWith(".")) {
errors.push(`Invalid extension: ${ext} (must start with dot)`);
}
}
}
}
if (errors.length > 0) {
console.error("ā RAG Configuration validation errors:");
errors.forEach((error) => console.error(` ⢠${error}`));
return false;
}
return true;
}
/**
* Get configuration value with dot notation
* @param {string} path - Configuration path (e.g., 'chunking.maxTokens')
* @param {any} defaultValue - Default value if not found
* @returns {any} Configuration value
*/
get(path, defaultValue = null) {
const config = this.loadedConfig || this.defaultConfig;
return path.split(".").reduce((obj, key) => {
return obj && obj[key] !== undefined ? obj[key] : defaultValue;
}, config);
}
/**
* Display current configuration
*/
displayConfig() {
const config = this.loadedConfig || this.defaultConfig;
console.log("\nš RAG Generator Configuration:");
console.log(` Source: ${this.configPath ? this.configPath : "defaults"}`);
console.log(` Extensions: ${config.extensions.include.length} types`);
console.log(` Max tokens per chunk: ${config.chunking.maxTokens}`);
console.log(` Token overlap: ${config.chunking.overlap}`);
console.log(` Max workers: ${config.performance.maxWorkers}`);
console.log(` Batch size: ${config.performance.batchSize}`);
console.log();
}
/**
* Parse file size string to bytes
* @param {string} sizeStr - Size string (e.g., '100MB', '1GB')
* @returns {number} Size in bytes
*/
parseFileSize(sizeStr) {
if (typeof sizeStr === "number") return sizeStr;
const units = {
B: 1,
KB: 1024,
MB: 1024 * 1024,
GB: 1024 * 1024 * 1024,
};
const match = sizeStr.match(/^(\d+(?:\.\d+)?)\s*([KMGT]?B)$/i);
if (!match) return 0;
const value = parseFloat(match[1]);
const unit = match[2].toUpperCase();
return Math.floor(value * (units[unit] || 1));
}
/**
* Check if file should be excluded by path
* @param {string} filePath - File path to check
* @returns {boolean} True if should be excluded
*/
shouldExcludePath(filePath) {
const config = this.loadedConfig || this.defaultConfig;
const excludePaths = config.paths?.exclude || [];
return excludePaths.some((pattern) => {
return (
filePath.includes(pattern) ||
filePath.includes(path.sep + pattern + path.sep)
);
});
}
/**
* Check if file should be excluded by filename pattern
* @param {string} fileName - File name to check
* @returns {boolean} True if should be excluded
*/
shouldExcludeFile(fileName) {
const config = this.loadedConfig || this.defaultConfig;
const excludeFiles = config.files?.exclude || [];
return excludeFiles.some((pattern) => {
// Simple glob pattern matching
const regexPattern = pattern.replace(/\./g, "\\.").replace(/\*/g, ".*");
const regex = new RegExp(`^${regexPattern}$`, "i");
return regex.test(fileName);
});
}
}
export default new RagConfigManager();