UNPKG

code-indexer

Version:

A code indexing service using MCP, Ollama, and Qdrant.

497 lines (449 loc) 14.1 kB
/** * Exclusion Configuration Module * Handles loading and parsing of indexer exclusion configuration */ import * as fs from 'fs' import * as path from 'path' import { z } from 'zod' import { getLogger } from '../logger.js' const logger = getLogger('ExclusionConfig') /** * Schema for size limits configuration */ const SizeLimitsSchema = z.object({ max_file_size_mb: z.number().positive().optional().default(50), max_file_size_bytes: z.number().positive().optional().default(52428800), }) /** * Schema for content-based exclusion rules */ const ContentBasedSchema = z.object({ binary_files: z.boolean().optional().default(true), empty_files: z.boolean().optional().default(false), minified_files: z.boolean().optional().default(true), }) /** * Schema for language-specific exclusions */ const LanguageSpecificSchema = z.object({ folders: z.array(z.string()).optional().default([]), files: z.array(z.string()).optional().default([]), }) /** * Schema for custom rules */ const CustomRulesSchema = z.object({ exclude_test_files: z.boolean().optional().default(false), exclude_documentation: z.boolean().optional().default(false), exclude_config_files: z.boolean().optional().default(false), exclude_sample_data: z.boolean().optional().default(true), }) /** * Main exclusion configuration schema */ const ExclusionConfigSchema = z.object({ $schema: z.string().optional(), title: z.string().optional(), description: z.string().optional(), version: z.string().optional(), exclusions: z.object({ folders: z.object({ description: z.string().optional(), patterns: z.array(z.string()).default([]), }), files: z.object({ description: z.string().optional(), patterns: z.array(z.string()).default([]), }), extensions: z.object({ description: z.string().optional(), patterns: z.array(z.string()).default([]), }), exact_names: z.object({ description: z.string().optional(), patterns: z.array(z.string()).default([]), }), size_limits: SizeLimitsSchema.optional(), content_based: ContentBasedSchema.optional(), }), inclusion_overrides: z.object({ description: z.string().optional(), patterns: z.array(z.string()).default([]), }).optional(), language_specific: z.record(z.string(), LanguageSpecificSchema).optional(), custom_rules: CustomRulesSchema.optional(), }) /** * Type definitions */ export type ExclusionConfig = z.infer<typeof ExclusionConfigSchema> export type SizeLimits = z.infer<typeof SizeLimitsSchema> export type ContentBasedRules = z.infer<typeof ContentBasedSchema> export type LanguageSpecific = z.infer<typeof LanguageSpecificSchema> export type CustomRules = z.infer<typeof CustomRulesSchema> /** * Default exclusion configuration */ const DEFAULT_EXCLUSION_CONFIG: ExclusionConfig = { exclusions: { folders: { patterns: [ 'node_modules/**', '.git/**', 'dist/**', 'build/**', 'coverage/**', '__pycache__/**', '.venv/**', 'venv/**', 'target/**', 'bin/**', 'obj/**', 'tmp/**', 'temp/**', 'logs/**', ], }, files: { patterns: [ '*.log', '*.tmp', '*.temp', '*.cache', '*.pid', '*.lock', '*.swp', '*.swo', '*~', '.DS_Store', 'Thumbs.db', '*.min.js', '*.min.css', '*.map', 'package-lock.json', 'yarn.lock', '*.pyc', '*.class', '*.jar', '*.dll', '*.exe', '*.so', '*.o', '*.obj', ], }, extensions: { patterns: ['log', 'tmp', 'temp', 'cache', 'bak', 'backup', 'old', 'swp', 'swo', 'pid', 'lock'], }, exact_names: { patterns: [ '.DS_Store', 'Thumbs.db', 'desktop.ini', '.gitkeep', '.eslintcache', '.stylelintcache', 'npm-debug.log', 'yarn-debug.log', 'yarn-error.log', ], }, size_limits: { max_file_size_mb: 50, max_file_size_bytes: 52428800, }, content_based: { binary_files: true, empty_files: false, minified_files: true, }, }, inclusion_overrides: { patterns: [ 'README.*', 'LICENSE*', 'CHANGELOG.*', 'CONTRIBUTING.*', '*.md', '*.txt', 'Dockerfile*', 'Makefile', ], }, custom_rules: { exclude_test_files: false, exclude_documentation: false, exclude_config_files: false, exclude_sample_data: true, }, } /** * Exclusion configuration manager */ export class ExclusionConfigManager { private config: ExclusionConfig private configPath: string | null = null constructor(config?: ExclusionConfig) { this.config = config || DEFAULT_EXCLUSION_CONFIG } /** * Load exclusion configuration from file */ static async loadFromFile(filePath: string): Promise<ExclusionConfigManager> { try { logger.info('Loading exclusion configuration', { filePath }) // Check if file exists if (!fs.existsSync(filePath)) { logger.warn('Exclusion config file not found, using defaults', { filePath, fallbackReason: 'File does not exist' }) return new ExclusionConfigManager() } // Check file permissions try { await fs.promises.access(filePath, fs.constants.R_OK) } catch (accessError) { logger.error('Cannot read exclusion config file', { filePath, error: accessError instanceof Error ? accessError.message : 'Permission denied' }) return new ExclusionConfigManager() } // Read and parse file let fileContent: string try { fileContent = await fs.promises.readFile(filePath, 'utf-8') } catch (readError) { logger.error('Failed to read exclusion config file', { filePath, error: readError instanceof Error ? readError.message : 'Read error' }) return new ExclusionConfigManager() } // Parse JSON let rawConfig: any try { rawConfig = JSON.parse(fileContent) } catch (parseError) { logger.error('Invalid JSON in exclusion config file', { filePath, error: parseError instanceof Error ? parseError.message : 'JSON parse error', hint: 'Check for syntax errors in the configuration file' }) return new ExclusionConfigManager() } // Lenient cleanup of common doc fields before validation try { if (rawConfig && typeof rawConfig === 'object') { // Drop language_specific.description if present (docs string often included) if ( rawConfig.language_specific && typeof rawConfig.language_specific.description === 'string' ) { logger.info('Removing non-schema field language_specific.description from exclusion config') delete rawConfig.language_specific.description } // Filter out any non-object entries under language_specific if (rawConfig.language_specific && typeof rawConfig.language_specific === 'object') { for (const [key, value] of Object.entries(rawConfig.language_specific)) { if (typeof value !== 'object' || value === null) { logger.warn('Removing invalid language_specific entry (expected object with folders/files arrays)', { key }) delete (rawConfig.language_specific as any)[key] } } } } } catch (cleanupError) { logger.warn('Exclusion config cleanup encountered an issue', { error: cleanupError instanceof Error ? cleanupError.message : 'Unknown error', }) } // Validate configuration against schema let validatedConfig: ExclusionConfig try { validatedConfig = ExclusionConfigSchema.parse(rawConfig) } catch (validationError) { logger.error('Invalid exclusion configuration schema', { filePath, error: validationError instanceof Error ? validationError.message : 'Validation error', hint: 'Check the configuration file format against the schema' }) return new ExclusionConfigManager() } // Validate patterns const validationResult = ExclusionConfigManager.validatePatterns(validatedConfig) if (!validationResult.isValid) { logger.warn('Some exclusion patterns are invalid', { filePath, invalidPatterns: validationResult.invalidPatterns, hint: 'Invalid patterns will be ignored' }) } logger.info('Successfully loaded exclusion configuration', { filePath, folderPatterns: validatedConfig.exclusions.folders.patterns.length, filePatterns: validatedConfig.exclusions.files.patterns.length, extensionPatterns: validatedConfig.exclusions.extensions.patterns.length, exactNamePatterns: validatedConfig.exclusions.exact_names.patterns.length, validPatterns: validationResult.validPatterns, invalidPatterns: validationResult.invalidPatterns }) const manager = new ExclusionConfigManager(validatedConfig) manager.configPath = filePath return manager } catch (error) { logger.error('Unexpected error loading exclusion configuration', { filePath, error: error instanceof Error ? error.message : 'Unknown error', stack: error instanceof Error ? error.stack : undefined }) // Return default configuration on any unexpected error logger.info('Falling back to default exclusion configuration') return new ExclusionConfigManager() } } /** * Get the current configuration */ getConfig(): ExclusionConfig { return this.config } /** * Get all exclusion patterns as a flat array (for backward compatibility) */ getAllExclusionPatterns(): string[] { const patterns: string[] = [] // Add folder patterns patterns.push(...this.config.exclusions.folders.patterns) // Add file patterns patterns.push(...this.config.exclusions.files.patterns) // Add extension patterns (convert to glob patterns) patterns.push(...this.config.exclusions.extensions.patterns.map(ext => `*.${ext}`)) // Add exact name patterns patterns.push(...this.config.exclusions.exact_names.patterns) // Add language-specific patterns if available if (this.config.language_specific) { for (const langConfig of Object.values(this.config.language_specific)) { patterns.push(...langConfig.folders) patterns.push(...langConfig.files) } } return [...new Set(patterns)] // Remove duplicates } /** * Check if a file should be excluded based on size */ shouldExcludeBySize(fileSizeBytes: number): boolean { const sizeLimits = this.config.exclusions.size_limits if (!sizeLimits) return false return fileSizeBytes > sizeLimits.max_file_size_bytes } /** * Get inclusion override patterns */ getInclusionOverrides(): string[] { return this.config.inclusion_overrides?.patterns || [] } /** * Get custom rules */ getCustomRules(): CustomRules { return this.config.custom_rules || { exclude_test_files: false, exclude_documentation: false, exclude_config_files: false, exclude_sample_data: true, } } /** * Get content-based rules */ getContentBasedRules(): ContentBasedRules { return this.config.exclusions.content_based || { binary_files: true, empty_files: false, minified_files: true, } } /** * Validate exclusion patterns */ static validatePatterns(config: ExclusionConfig): { isValid: boolean validPatterns: number invalidPatterns: string[] } { const invalidPatterns: string[] = [] let validPatterns = 0 // Helper function to validate a single pattern const validatePattern = (pattern: string, type: string): boolean => { try { // Basic validation - check for obviously invalid patterns if (!pattern || pattern.trim().length === 0) { invalidPatterns.push(`${type}: Empty pattern`) return false } // Check for invalid regex characters that might cause issues // Allow glob wildcards (* and ?) as they are expected in patterns const problematicChars = /[<>"|\x00-\x1f]/ if (problematicChars.test(pattern)) { invalidPatterns.push(`${type}: "${pattern}" contains invalid characters`) return false } // Try to create a regex from the pattern to test validity const regexPattern = pattern .replace(/\*\*/g, '.*') .replace(/\*/g, '[^/]*') .replace(/\?/g, '.') .replace(/\./g, '\\.') new RegExp(regexPattern) validPatterns++ return true } catch (error) { invalidPatterns.push(`${type}: "${pattern}" - ${error instanceof Error ? error.message : 'Invalid pattern'}`) return false } } // Validate all pattern types config.exclusions.folders.patterns.forEach(pattern => validatePattern(pattern, 'folder')) config.exclusions.files.patterns.forEach(pattern => validatePattern(pattern, 'file')) config.exclusions.extensions.patterns.forEach(pattern => validatePattern(pattern, 'extension')) config.exclusions.exact_names.patterns.forEach(pattern => validatePattern(pattern, 'exact_name')) // Validate language-specific patterns if (config.language_specific) { Object.entries(config.language_specific).forEach(([lang, langConfig]) => { langConfig.folders.forEach(pattern => validatePattern(pattern, `${lang}_folder`)) langConfig.files.forEach(pattern => validatePattern(pattern, `${lang}_file`)) }) } // Validate inclusion overrides if (config.inclusion_overrides) { config.inclusion_overrides.patterns.forEach(pattern => validatePattern(pattern, 'inclusion_override')) } return { isValid: invalidPatterns.length === 0, validPatterns, invalidPatterns } } /** * Reload configuration from file */ async reload(): Promise<void> { if (!this.configPath) { logger.warn('No config path set, cannot reload') return } const newManager = await ExclusionConfigManager.loadFromFile(this.configPath) this.config = newManager.config logger.info('Exclusion configuration reloaded') } }