UNPKG

cntx-ui

Version:

File context management tool with web UI and MCP server for AI development workflows - bundle project files for LLM consumption

588 lines (492 loc) 17.7 kB
/** * True Semantic Splitting - Function-level code chunks with context * Creates surgical, self-contained chunks for AI consumption * Operates parallel to file-level bundle system */ import { readFileSync, existsSync } from 'fs' import { extname, basename, dirname, join } from 'path' import glob from 'glob' import HeuristicsManager from './heuristics-manager.js' export default class SemanticSplitter { constructor(options = {}) { this.options = { maxChunkSize: 2000, // Max chars per chunk includeContext: true, // Include imports/types needed groupRelated: true, // Group related functions minFunctionSize: 50, // Skip tiny functions ...options } // Initialize heuristics manager this.heuristicsManager = new HeuristicsManager() } /** * Extract semantic chunks from project */ async extractSemanticChunks(projectPath, patterns = ['**/*.{js,jsx,ts,tsx,mjs}'], bundleConfig = null) { console.log('🔪 Starting semantic splitting...') const files = this.findFiles(projectPath, patterns) console.log(`📁 Found ${files.length} files to split`) // Load bundle configuration if provided this.bundleConfig = bundleConfig const allFunctions = [] const allTypes = [] const allImports = [] // Extract all code elements for (const filePath of files) { try { const elements = this.extractCodeElements(filePath, projectPath) allFunctions.push(...elements.functions) allTypes.push(...elements.types) allImports.push(...elements.imports) } catch (error) { console.warn(`Failed to extract from ${filePath}: ${error.message}`) } } console.log(`⚡ Extracted ${allFunctions.length} functions, ${allTypes.length} types`) // Create semantic chunks const chunks = this.createSemanticChunks(allFunctions, allTypes, allImports) console.log(`🧩 Created ${chunks.length} semantic chunks`) return { summary: { totalFiles: files.length, totalFunctions: allFunctions.length, totalChunks: chunks.length, averageChunkSize: chunks.reduce((sum, c) => sum + c.code.length, 0) / chunks.length }, chunks: chunks } } /** * Find files to analyze (same logic as bundles) */ findFiles(projectPath, patterns) { const files = [] for (const pattern of patterns) { const matches = glob.sync(pattern, { cwd: projectPath, ignore: [ 'node_modules/**', 'dist/**', 'build/**', '.git/**', '*.test.*', '*.spec.*', '**/test/**', '**/tests/**', '**/*.min.js', '**/*.bundle.js' ] }) files.push(...matches.filter(file => !file.includes('node_modules') && !file.includes('dist/') && !file.includes('.min.') )) } return [...new Set(files)] } /** * Extract functions, types, and imports from a file */ extractCodeElements(relativePath, projectPath) { const fullPath = join(projectPath, relativePath) if (!existsSync(fullPath)) return { functions: [], types: [], imports: [] } const content = readFileSync(fullPath, 'utf8') const lines = content.split('\n') return { functions: this.extractFunctions(content, lines, relativePath), types: this.extractTypes(content, lines, relativePath), imports: this.extractImports(content, relativePath) } } /** * Extract functions with robust regex patterns */ extractFunctions(content, lines, filePath) { const functions = [] // Pattern 1: Regular function declarations const functionRegex = /^(\s*)(?:export\s+)?(?:async\s+)?function\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\([^)]*\)\s*\{/gm // Pattern 2: Arrow functions assigned to const/let const arrowRegex = /^(\s*)(?:export\s+)?const\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*(?:async\s+)?\([^)]*\)\s*=>\s*[\{]/gm // Pattern 3: Class methods const methodRegex = /^(\s+)(?:async\s+)?([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\([^)]*\)\s*\{/gm // Pattern 4: React components (function components) const componentRegex = /^(\s*)(?:export\s+(?:default\s+)?)?function\s+([A-Z][a-zA-Z0-9_$]*)\s*\([^)]*\)\s*\{/gm const patterns = [ { regex: functionRegex, type: 'function' }, { regex: arrowRegex, type: 'arrow_function' }, { regex: methodRegex, type: 'method' }, { regex: componentRegex, type: 'react_component' } ] for (const { regex, type } of patterns) { let match while ((match = regex.exec(content)) !== null) { const functionName = match[2] const indentation = match[1] const startIndex = match.index // Skip if it's a keyword or common false positive if (['if', 'for', 'while', 'switch', 'catch'].includes(functionName)) { continue } const startLine = content.substring(0, startIndex).split('\n').length const functionBody = this.extractFunctionBody(content, startIndex) if (functionBody && functionBody.length > this.options.minFunctionSize) { functions.push({ name: functionName, type: type, filePath: filePath, startLine: startLine, code: functionBody, indentation: indentation.length, isExported: match[0].includes('export'), isAsync: match[0].includes('async'), size: functionBody.length }) } } } return functions } /** * Extract function body using brace matching */ extractFunctionBody(content, startIndex) { const openBraceIndex = content.indexOf('{', startIndex) if (openBraceIndex === -1) return null let braceCount = 0 let currentIndex = openBraceIndex let inString = false let stringChar = null while (currentIndex < content.length) { const char = content[currentIndex] const prevChar = content[currentIndex - 1] || '' // Handle string literals if ((char === '"' || char === "'" || char === '`') && prevChar !== '\\') { if (!inString) { inString = true stringChar = char } else if (char === stringChar) { inString = false stringChar = null } } // Count braces outside strings if (!inString) { if (char === '{') braceCount++ else if (char === '}') braceCount-- if (braceCount === 0) { // Found the closing brace return content.substring(startIndex, currentIndex + 1).trim() } } currentIndex++ } return null // Unmatched braces } /** * Extract type definitions and interfaces */ extractTypes(content, lines, filePath) { const types = [] // TypeScript interfaces const interfaceRegex = /^(\s*)(?:export\s+)?interface\s+([A-Z][a-zA-Z0-9_$]*)\s*\{/gm // Type aliases const typeRegex = /^(\s*)(?:export\s+)?type\s+([A-Z][a-zA-Z0-9_$]*)\s*=/gm const patterns = [ { regex: interfaceRegex, type: 'interface' }, { regex: typeRegex, type: 'type_alias' } ] for (const { regex, type } of patterns) { let match while ((match = regex.exec(content)) !== null) { const typeName = match[2] const startIndex = match.index const startLine = content.substring(0, startIndex).split('\n').length let typeBody if (type === 'interface') { typeBody = this.extractTypeBody(content, startIndex) } else { // For type aliases, extract until semicolon or newline const endIndex = content.indexOf(';', startIndex) typeBody = content.substring(startIndex, endIndex + 1).trim() } if (typeBody) { types.push({ name: typeName, type: type, filePath: filePath, startLine: startLine, code: typeBody, isExported: match[0].includes('export') }) } } } return types } /** * Extract type body (similar to function body) */ extractTypeBody(content, startIndex) { const openBraceIndex = content.indexOf('{', startIndex) if (openBraceIndex === -1) return null let braceCount = 0 let currentIndex = openBraceIndex while (currentIndex < content.length) { const char = content[currentIndex] if (char === '{') braceCount++ else if (char === '}') braceCount-- if (braceCount === 0) { return content.substring(startIndex, currentIndex + 1).trim() } currentIndex++ } return null } /** * Extract import statements */ extractImports(content, filePath) { const imports = [] const importRegex = /^(\s*)import\s+(.+?)\s+from\s+['"`]([^'"`]+)['"`]/gm let match while ((match = importRegex.exec(content)) !== null) { const importStatement = match[0].trim() const importPath = match[3] imports.push({ statement: importStatement, path: importPath, filePath: filePath, isRelative: importPath.startsWith('.'), isExternal: !importPath.startsWith('.') }) } return imports } /** * Create semantic chunks from extracted elements */ createSemanticChunks(functions, types, imports) { const chunks = [] // Create function-level chunks for (const func of functions) { const chunk = this.createFunctionChunk(func, types, imports) if (chunk) { chunks.push(chunk) } } // Create type-only chunks for standalone types for (const type of types) { if (!this.isTypeUsedInFunctions(type, functions)) { chunks.push(this.createTypeChunk(type, imports)) } } return chunks } /** * Create a semantic chunk for a function with its context */ createFunctionChunk(func, allTypes, allImports) { let chunkCode = '' const includedImports = new Set() const includedTypes = new Set() // Find relevant imports for this function const fileImports = allImports.filter(imp => imp.filePath === func.filePath) // Find types referenced in the function const referencedTypes = this.findReferencedTypes(func.code, allTypes) // Add necessary imports for (const imp of fileImports) { if (this.isImportRelevant(imp, func.code)) { chunkCode += imp.statement + '\n' includedImports.add(imp.path) } } // Add referenced types for (const type of referencedTypes) { chunkCode += '\n' + type.code + '\n' includedTypes.add(type.name) } // Add the function itself chunkCode += '\n' + func.code // Create chunk with adaptive sizing - never lose functions let finalCode = chunkCode.trim() let contextLevel = 'full' // If too large, try with reduced context if (chunkCode.length > this.options.maxChunkSize) { // Fallback 1: Function + essential imports only (no types) finalCode = '' for (const imp of fileImports.slice(0, 3)) { // Limit to 3 imports if (this.isImportRelevant(imp, func.code)) { finalCode += imp.statement + '\n' } } finalCode += '\n' + func.code contextLevel = 'reduced' } // If still too large, function only if (finalCode.length > this.options.maxChunkSize) { finalCode = func.code contextLevel = 'minimal' } // Always create a chunk - never lose functions return { name: func.name, type: 'function_chunk', subtype: func.type, code: finalCode, size: finalCode.length, filePath: func.filePath, startLine: func.startLine, isExported: func.isExported, isAsync: func.isAsync, complexity: this.calculateComplexity(func.code), includes: { imports: contextLevel === 'minimal' ? [] : Array.from(includedImports), types: contextLevel === 'full' ? Array.from(includedTypes) : [] }, purpose: this.determinePurpose(func), tags: [...this.generateTags(func), contextLevel === 'full' ? 'full-context' : contextLevel === 'reduced' ? 'reduced-context' : 'minimal-context'], bundles: this.getFileBundles(func.filePath) } } /** * Create a chunk for standalone types */ createTypeChunk(type, allImports) { let chunkCode = '' const includedImports = new Set() // Add relevant imports if any const fileImports = allImports.filter(imp => imp.filePath === type.filePath) for (const imp of fileImports.slice(0, 3)) { // Limit imports chunkCode += imp.statement + '\n' } chunkCode += '\n' + type.code return { name: type.name, type: 'type_chunk', subtype: type.type, code: chunkCode.trim(), size: chunkCode.length, filePath: type.filePath, startLine: type.startLine, isExported: type.isExported, purpose: 'Type definition', tags: ['type', type.type], bundles: this.getFileBundles(type.filePath) } } /** * Find types referenced in function code */ findReferencedTypes(functionCode, allTypes) { const referenced = [] for (const type of allTypes) { // Check if type name appears in function code const typeRegex = new RegExp(`\\b${type.name}\\b`, 'g') if (typeRegex.test(functionCode)) { referenced.push(type) } } return referenced } /** * Check if import is relevant to function */ isImportRelevant(importStatement, functionCode) { // Simple heuristic: check if any imported identifiers appear in function const importMatch = importStatement.statement.match(/import\s+(.+?)\s+from/) if (!importMatch) return false const imported = importMatch[1] // Handle different import styles if (imported.includes('{')) { // Named imports: import { foo, bar } from 'module' const namedImports = imported.match(/\{([^}]+)\}/)?.[1] if (namedImports) { const names = namedImports.split(',').map(name => name.trim()) return names.some(name => functionCode.includes(name)) } } else { // Default import: import foo from 'module' const defaultImport = imported.trim() return functionCode.includes(defaultImport) } return false } /** * Check if type is used in any function */ isTypeUsedInFunctions(type, functions) { const typeRegex = new RegExp(`\\b${type.name}\\b`, 'g') return functions.some(func => typeRegex.test(func.code)) } /** * Calculate function complexity (cyclomatic complexity) */ calculateComplexity(code) { let complexity = 1 // Base complexity // Simple complexity indicators - just count control flow structures const indicators = { 'if': (code.match(/\bif\s*\(/g) || []).length, 'else if': (code.match(/\belse\s+if\b/g) || []).length, 'for': (code.match(/\bfor\s*\(/g) || []).length, 'while': (code.match(/\bwhile\s*\(/g) || []).length, 'switch': (code.match(/\bswitch\s*\(/g) || []).length, 'case': (code.match(/\bcase\s+/g) || []).length, 'catch': (code.match(/\bcatch\s*\(/g) || []).length, 'ternary': (code.match(/\?\s*[^?\.\s]/g) || []).length, 'logical_and': (code.match(/&&\s*[^&=]/g) || []).length, 'logical_or': (code.match(/\|\|\s*[^|=]/g) || []).length } // Sum all complexity indicators for (const count of Object.values(indicators)) { complexity += count } // Return complexity with reasonable thresholds return { score: complexity, level: complexity <= 3 ? 'low' : complexity <= 8 ? 'medium' : 'high' } } /** * Determine function purpose using heuristics configuration */ determinePurpose(func) { return this.heuristicsManager.determinePurpose(func) } /** * Generate tags for function */ generateTags(func) { const tags = [func.type] if (func.isExported) tags.push('exported') if (func.isAsync) tags.push('async') if (func.size > 1000) tags.push('large') if (func.code.includes('console.log')) tags.push('has-logging') if (func.code.includes('throw')) tags.push('can-throw') if (func.code.includes('return')) tags.push('returns-value') return tags } /** * Determine which bundles a file belongs to */ getFileBundles(filePath) { if (!this.bundleConfig?.bundles) return [] const bundles = [] for (const [bundleName, patterns] of Object.entries(this.bundleConfig.bundles)) { // Skip master bundle as requested if (bundleName === 'master') continue // Check if file matches any pattern in this bundle for (const pattern of patterns) { if (this.matchesPattern(filePath, pattern)) { bundles.push(bundleName) break // Don't add the same bundle multiple times } } } return bundles } /** * Simple pattern matching (basic glob support) */ matchesPattern(filePath, pattern) { // Convert glob pattern to regex const regex = pattern .replace(/\*\*/g, '.*') // ** matches any directories .replace(/\*/g, '[^/]*') // * matches any characters except / .replace(/\./g, '\\.') // Escape dots return new RegExp(`^${regex}$`).test(filePath) } }