UNPKG

cntx-ui

Version:

File context management tool with web UI and MCP server for AI development workflows - bundle project files for LLM consumption

406 lines (343 loc) 11.6 kB
/** * Function-Level Semantic Chunker * Extracts individual functions/methods/components as discrete chunks * with intelligent context inclusion */ import { readFileSync, existsSync } from 'fs' import { extname, basename, dirname, join } from 'path' import glob from 'glob' export default class FunctionLevelChunker { constructor(options = {}) { this.options = { includeContext: true, maxContextLines: 50, groupRelated: true, ...options } } /** * Extract function-level chunks from project */ async extractFunctionChunks(projectPath, patterns = ['**/*.{js,jsx,ts,tsx,mjs}']) { console.log('🔍 Starting function-level extraction...') const files = this.findFiles(projectPath, patterns) console.log(`📁 Found ${files.length} files to analyze`) const allFunctions = [] let processedFiles = 0 for (const filePath of files) { try { const functions = this.extractFunctionsFromFile(filePath, projectPath) allFunctions.push(...functions) processedFiles++ } catch (error) { console.warn(`Failed to extract from ${filePath}: ${error.message}`) } } console.log(`✅ Extracted ${allFunctions.length} functions from ${processedFiles} files`) // Create semantic chunks from functions const chunks = this.createFunctionChunks(allFunctions) console.log(`📦 Created ${chunks.length} function-level chunks`) return { summary: { totalFiles: processedFiles, totalFunctions: allFunctions.length, totalChunks: chunks.length }, functions: allFunctions, chunks: chunks } } /** * Find files to analyze */ findFiles(projectPath, patterns) { const files = [] for (const pattern of patterns) { const matches = glob.sync(pattern, { cwd: projectPath, ignore: [ 'node_modules/**', 'dist/**', 'build/**', '.git/**', '*.test.*', '*.spec.*', '**/test/**', '**/tests/**', '**/*.min.js', '**/*.bundle.js' ] }) files.push(...matches.filter(file => !file.includes('node_modules') && !file.includes('dist/') && !file.includes('.min.') )) } return [...new Set(files)] } /** * Extract all functions from a single file */ extractFunctionsFromFile(relativePath, projectPath) { const fullPath = join(projectPath, relativePath) if (!existsSync(fullPath)) return [] const content = readFileSync(fullPath, 'utf8') const lines = content.split('\n') const functions = [] // Try tree-sitter first, fallback to regex try { const treeSitterFunctions = this.extractWithTreeSitter(content, relativePath) functions.push(...treeSitterFunctions) } catch (error) { // Fallback to regex extraction const regexFunctions = this.extractWithRegex(content, lines, relativePath) functions.push(...regexFunctions) } return functions } /** * Extract functions using regex patterns (robust fallback) */ extractWithRegex(content, lines, filePath) { const functions = [] // Patterns for different function types const patterns = [ // Function declarations: function name() {} { pattern: /^[\s]*(?:export\s+)?(?:async\s+)?function\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(/gm, type: 'function' }, // Arrow functions: const name = () => {} { pattern: /^[\s]*(?:export\s+)?const\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*(?:async\s+)?\([^)]*\)\s*=>/gm, type: 'arrow_function' }, // Class methods: methodName() {} { pattern: /^[\s]*(?:async\s+)?([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\([^)]*\)\s*\{/gm, type: 'method' }, // React components: export function ComponentName() {} { pattern: /^[\s]*export\s+(?:default\s+)?function\s+([A-Z][a-zA-Z0-9_$]*)\s*\(/gm, type: 'react_component' } ] for (const { pattern, type } of patterns) { let match while ((match = pattern.exec(content)) !== null) { const functionName = match[1] const startIndex = match.index // Find the line number const lineNumber = content.substring(0, startIndex).split('\n').length // Extract function body const functionInfo = this.extractFunctionBody(content, startIndex, lines, lineNumber) if (functionInfo && functionInfo.code.length > 10) { // Only include substantial functions functions.push({ name: functionName, type: type, filePath: filePath, startLine: lineNumber, endLine: functionInfo.endLine, code: functionInfo.code, context: this.extractContext(content, functionInfo, filePath), signature: match[0].trim() }) } } } return functions } /** * Extract function body by finding matching braces */ extractFunctionBody(content, startIndex, lines, startLine) { // Find opening brace let braceIndex = content.indexOf('{', startIndex) if (braceIndex === -1) { // Handle arrow functions without braces: const fn = () => expression const lineEnd = content.indexOf('\n', startIndex) if (lineEnd !== -1) { const functionCode = content.substring(startIndex, lineEnd) return { code: functionCode, endLine: startLine } } return null } // Count braces to find matching closing brace let braceCount = 1 let currentIndex = braceIndex + 1 let inString = false let stringChar = null while (currentIndex < content.length && braceCount > 0) { const char = content[currentIndex] const prevChar = content[currentIndex - 1] // Handle string literals to avoid counting braces inside strings if ((char === '"' || char === "'" || char === '`') && prevChar !== '\\') { if (!inString) { inString = true stringChar = char } else if (char === stringChar) { inString = false stringChar = null } } if (!inString) { if (char === '{') braceCount++ else if (char === '}') braceCount-- } currentIndex++ } if (braceCount !== 0) return null // Unmatched braces // Extract the function code const functionCode = content.substring(startIndex, currentIndex) const endLine = startLine + functionCode.split('\n').length - 1 return { code: functionCode.trim(), endLine: endLine } } /** * Extract relevant context for a function */ extractContext(content, functionInfo, filePath) { const context = { imports: this.extractImports(content), types: this.extractTypes(content), dependencies: [], calledFunctions: this.extractCalledFunctions(functionInfo.code) } return context } /** * Extract import statements */ extractImports(content) { const imports = [] const importPattern = /import\s+(?:{[^}]+}|\*\s+as\s+\w+|\w+)\s+from\s+['"`]([^'"`]+)['"`]/g let match while ((match = importPattern.exec(content)) !== null) { imports.push(match[1]) } return imports } /** * Extract type definitions */ extractTypes(content) { const types = [] const typePattern = /(?:type|interface)\s+([A-Z][a-zA-Z0-9]*)/g let match while ((match = typePattern.exec(content)) !== null) { types.push(match[1]) } return types } /** * Extract function calls within code */ extractCalledFunctions(code) { const calls = [] const callPattern = /([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(/g let match while ((match = callPattern.exec(code)) !== null) { const funcName = match[1] // Filter out common keywords and built-ins if (!['if', 'for', 'while', 'switch', 'catch', 'console', 'Object', 'Array'].includes(funcName)) { calls.push(funcName) } } return [...new Set(calls)] // Remove duplicates } /** * Create semantic chunks from extracted functions */ createFunctionChunks(functions) { const chunks = [] // Group functions by file and related functionality const fileGroups = this.groupFunctionsByFile(functions) for (const [filePath, fileFunctions] of Object.entries(fileGroups)) { // Create individual function chunks for (const func of fileFunctions) { chunks.push({ name: `${func.name}`, type: 'function', subtype: func.type, filePath: func.filePath, functions: [func], size: func.code.length, complexity: this.calculateComplexity(func.code), context: func.context, purpose: this.determinePurpose(func), tags: this.generateTags(func) }) } } return chunks } /** * Group functions by file */ groupFunctionsByFile(functions) { const groups = {} for (const func of functions) { if (!groups[func.filePath]) { groups[func.filePath] = [] } groups[func.filePath].push(func) } return groups } /** * Calculate function complexity */ calculateComplexity(code) { const complexityIndicators = [ 'if', 'else', 'for', 'while', 'switch', 'case', 'try', 'catch', '&&', '||', '?', ':', 'async', 'await' ] let complexity = 1 // Base complexity for (const indicator of complexityIndicators) { const count = (code.match(new RegExp(`\\b${indicator}\\b`, 'g')) || []).length complexity += count } return { score: complexity, level: complexity < 5 ? 'low' : complexity < 15 ? 'medium' : 'high' } } /** * Determine function purpose */ determinePurpose(func) { const name = func.name.toLowerCase() const code = func.code.toLowerCase() if (func.type === 'react_component') return 'React component' if (name.startsWith('use') && func.type === 'function') return 'React hook' if (name.includes('test') || name.includes('spec')) return 'Test function' if (name.includes('get') || name.includes('fetch')) return 'Data retrieval' if (name.includes('create') || name.includes('add')) return 'Data creation' if (name.includes('update') || name.includes('edit')) return 'Data modification' if (name.includes('delete') || name.includes('remove')) return 'Data deletion' if (name.includes('validate') || name.includes('check')) return 'Validation' if (code.includes('express') || code.includes('router')) return 'API endpoint' return 'Utility function' } /** * Generate tags for function */ generateTags(func) { const tags = [] tags.push(func.type) if (func.context.imports.length > 0) tags.push('has-imports') if (func.context.calledFunctions.length > 3) tags.push('complex-logic') if (func.code.includes('async')) tags.push('async') if (func.code.includes('export')) tags.push('exported') if (func.name.match(/^[A-Z]/)) tags.push('component-style') return tags } /** * Fallback tree-sitter extraction (if available) */ extractWithTreeSitter(content, filePath) { // TODO: Implement tree-sitter extraction for functions // For now, return empty to force regex fallback return [] } }