UNPKG

@wildcard-ai/deepcodex

Version:

Advanced codebase indexing and semantic search MCP server

409 lines 16.3 kB
/** * TreeSitterSymbolExtractor - FULL Tree-sitter AST Implementation * * This is the proper Tree-sitter implementation with real AST parsing, * replacing the regex-based fallback approach with true structural understanding. * * Features: * - Real Tree-sitter parsers for TypeScript/JavaScript * - AST-based scope detection and symbol extraction * - Semantic understanding of code structure * - No regex patterns or manual brace counting */ import { Logger } from '../../utils/Logger.js'; // Import Tree-sitter modules let Parser; let TypeScriptLanguage; let JavaScriptLanguage; // Lazy load Tree-sitter modules to handle import issues async function loadTreeSitter() { if (!Parser) { Parser = (await import('tree-sitter')).default; const tsModule = await import('tree-sitter-typescript'); const jsModule = await import('tree-sitter-javascript'); TypeScriptLanguage = tsModule.default.typescript; JavaScriptLanguage = jsModule.default; } } export class TreeSitterSymbolExtractorFull { parsers = new Map(); initialized = false; logger; constructor() { this.logger = new Logger('TREESITTER-FULL', 'info'); } /** * Initialize Tree-sitter parsers with real implementations */ async initialize() { if (this.initialized) return; try { await loadTreeSitter(); // Initialize TypeScript parser const tsParser = new Parser(); tsParser.setLanguage(TypeScriptLanguage); this.parsers.set('typescript', tsParser); // Initialize JavaScript parser const jsParser = new Parser(); jsParser.setLanguage(JavaScriptLanguage); this.parsers.set('javascript', jsParser); this.initialized = true; this.logger.info('✅ Tree-sitter parsers initialized successfully'); } catch (error) { this.logger.error(`❌ Tree-sitter initialization failed: ${error}`); throw error; } } /** * Extract symbols using real Tree-sitter AST parsing with intelligent chunking */ async extractSymbols(content, language, filePath) { await this.initialize(); if (!this.parsers.has(language)) { throw new Error(`Unsupported language: ${language}`); } const parser = this.parsers.get(language); try { // Check if content exceeds Tree-sitter's 32KB limit const TREESITTER_LIMIT = 32768; // 2^15 bytes if (content.length <= TREESITTER_LIMIT) { // Small file - parse directly return await this.parseContentDirectly(parser, content, language, filePath); } else { // Large file - use intelligent chunking return await this.parseContentWithChunking(parser, content, language, filePath); } } catch (error) { this.logger.error(`AST parsing failed for ${filePath}: ${error}`); throw error; } } /** * Parse content directly (for files under 32KB) */ async parseContentDirectly(parser, content, language, filePath) { const tree = parser.parse(content); const symbols = []; const imports = []; const exports = []; const docstrings = []; const parseErrors = []; await this.traverseASTNode(tree.rootNode, symbols, imports, exports, docstrings, parseErrors, language, []); this.logger.info(`Extracted ${symbols.length} symbols from ${filePath}`); return { symbols, imports, exports, docstrings, scopeGraph: { nodes: [], edges: [] }, parseErrors }; } /** * Parse large content using simple line-based chunking */ async parseContentWithChunking(parser, content, language, filePath) { // Use simple line-based chunking that's guaranteed to work return await this.parseWithLineBoundaries(parser, content, language, filePath); } /** * Fallback: Parse with simple line boundaries */ async parseWithLineBoundaries(parser, content, language, filePath) { // Simple approach: split into reasonably sized chunks at line boundaries const lines = content.split('\n'); const LINES_PER_CHUNK = 500; // Small enough to be safe const allSymbols = []; const allImports = []; const allExports = []; const allDocstrings = []; const allParseErrors = []; for (let i = 0; i < lines.length; i += LINES_PER_CHUNK) { const chunkLines = lines.slice(i, i + LINES_PER_CHUNK); const chunkContent = chunkLines.join('\n'); try { const result = await this.parseContentDirectly(parser, chunkContent, language, `${filePath}:lines${i}-${i + chunkLines.length}`); // Adjust line numbers result.symbols.forEach(symbol => { symbol.startLine += i; symbol.endLine += i; }); allSymbols.push(...result.symbols); allImports.push(...result.imports); allExports.push(...result.exports); allDocstrings.push(...result.docstrings); allParseErrors.push(...result.parseErrors); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); this.logger.warn(`Line chunk ${i} parsing failed: ${errorMessage}`); allParseErrors.push(`Lines ${i}-${i + chunkLines.length}: ${errorMessage}`); } } return { symbols: allSymbols, imports: allImports, exports: Array.from(new Set(allExports)), docstrings: allDocstrings, scopeGraph: { nodes: [], edges: [] }, parseErrors: allParseErrors }; } /** * CORE: AST Node Traversal with Semantic Understanding */ async traverseASTNode(node, symbols, imports, exports, docstrings, parseErrors, language, scopeStack) { const nodeType = node.type; const startLine = node.startPosition.row + 1; const endLine = node.endPosition.row + 1; // Process TypeScript/JavaScript nodes if (language === 'typescript' || language === 'javascript') { await this.processTypeScriptASTNode(node, symbols, imports, exports, scopeStack, startLine, endLine); } // Update scope stack for child traversal const newScopeStack = [...scopeStack]; // Add to scope stack based on AST node type (not regex patterns) switch (nodeType) { case 'class_declaration': newScopeStack.push('class'); break; case 'method_definition': case 'function_declaration': case 'arrow_function': case 'function_expression': newScopeStack.push('method'); break; } // Recursively traverse all children with proper scope context for (const child of node.namedChildren) { await this.traverseASTNode(child, symbols, imports, exports, docstrings, parseErrors, language, newScopeStack); } } /** * Process TypeScript/JavaScript AST nodes with semantic understanding */ async processTypeScriptASTNode(node, symbols, imports, exports, scopeStack, startLine, endLine) { const nodeType = node.type; const isExported = this.isExportedNode(node); const currentScope = this.determineScope(scopeStack, isExported); switch (nodeType) { // CLASS DECLARATIONS case 'class_declaration': const className = this.getNodeName(node); if (className) { symbols.push({ name: className, type: 'class', startLine, endLine, startColumn: node.startPosition.column, endColumn: node.endPosition.column, scope: currentScope }); if (isExported) exports.push(className); } break; // INTERFACE DECLARATIONS case 'interface_declaration': const interfaceName = this.getNodeName(node); if (interfaceName) { symbols.push({ name: interfaceName, type: 'interface', startLine, endLine, startColumn: node.startPosition.column, endColumn: node.endPosition.column, scope: currentScope }); if (isExported) exports.push(interfaceName); } break; // TYPE DECLARATIONS case 'type_alias_declaration': const typeName = this.getNodeName(node); if (typeName) { symbols.push({ name: typeName, type: 'type', startLine, endLine, startColumn: node.startPosition.column, endColumn: node.endPosition.column, scope: currentScope }); if (isExported) exports.push(typeName); } break; // FUNCTION DECLARATIONS case 'function_declaration': case 'method_definition': // Only extract functions/methods at appropriate scope levels if (!this.isInMethodScope(scopeStack)) { const functionName = this.getNodeName(node); if (functionName) { symbols.push({ name: functionName, type: nodeType === 'method_definition' ? 'method' : 'function', startLine, endLine, startColumn: node.startPosition.column, endColumn: node.endPosition.column, scope: currentScope }); if (isExported) exports.push(functionName); } } break; // VARIABLE DECLARATIONS - CRITICAL FILTERING // TypeScript uses 'lexical_declaration' for const/let declarations case 'lexical_declaration': case 'variable_declaration': await this.processVariableDeclarationAST(node, symbols, exports, scopeStack, isExported, startLine, endLine); break; // IMPORT STATEMENTS case 'import_statement': await this.processImportAST(node, imports); break; } } /** * CRITICAL: Process variable declarations with AST-based semantic filtering */ async processVariableDeclarationAST(node, symbols, exports, scopeStack, isExported, startLine, endLine) { // CORE RULE: Never extract variables inside methods/functions if (this.isInMethodScope(scopeStack)) { return; // Skip all method-scoped variables } // Extract variable declarators from AST const declarators = node.namedChildren.filter(child => child.type === 'variable_declarator'); for (const declarator of declarators) { const varName = this.getNodeName(declarator); if (!varName) continue; // SEMANTIC FILTERING: Only extract meaningful variables const shouldExtract = isExported || this.isVariableSemanticalleMeaningful(declarator); if (shouldExtract) { const varType = this.getVariableTypeFromAST(node); const currentScope = this.determineScope(scopeStack, isExported); symbols.push({ name: varName, type: varType, startLine, endLine, startColumn: node.startPosition.column, endColumn: node.endPosition.column, scope: currentScope }); if (isExported) exports.push(varName); } } } /** * Helper: Check if we're inside a method/function scope using AST */ isInMethodScope(scopeStack) { return scopeStack.includes('method') || scopeStack.includes('function'); } /** * Helper: Determine if node is exported by checking AST structure */ isExportedNode(node) { // Check parent nodes for export_statement let current = node.parent; while (current) { if (current.type === 'export_statement') return true; current = current.parent; } // Check for export keyword in children for (const child of node.children) { if (child.type === 'export' || child.text === 'export') return true; } return false; } /** * Helper: Get node name from AST structure */ getNodeName(node) { // Try to get name from field const nameNode = node.childForFieldName('name'); if (nameNode && (nameNode.type === 'identifier' || nameNode.type === 'type_identifier')) { return nameNode.text; } // Fallback: find identifier child for (const child of node.namedChildren) { if (child.type === 'identifier' || child.type === 'type_identifier') { return child.text; } } return null; } /** * Helper: Check if variable is semantically meaningful using AST */ isVariableSemanticalleMeaningful(declaratorNode) { const init = declaratorNode.childForFieldName('value'); if (!init) return false; const initType = init.type; return (initType === 'array' || // Fixed: was 'array_expression' initType === 'object' || // Fixed: was 'object_expression' initType === 'arrow_function' || initType === 'function_expression' || initType === 'new_expression' || initType === 'call_expression'); } /** * Helper: Get variable type from AST */ getVariableTypeFromAST(node) { for (const child of node.children) { if (child.text === 'const') return 'constant'; if (child.text === 'let' || child.text === 'var') return 'variable'; } return 'variable'; } /** * Helper: Determine scope based on context */ determineScope(scopeStack, isExported) { if (isExported) return 'export'; if (scopeStack.length > 0) return 'local'; return 'global'; } /** * Helper: Process import statements using AST */ async processImportAST(node, imports) { const source = node.childForFieldName('source'); if (source) { imports.push({ module: source.text.replace(/['"]/g, ''), symbols: [], // Would need more detailed processing isDefault: false, isNamespace: false, line: node.startPosition.row + 1, source: node.text }); } } /** * Get extraction statistics */ getStats() { return { initialized: this.initialized, supportedLanguages: ['typescript', 'javascript'], availableParsers: Array.from(this.parsers.keys()) }; } } //# sourceMappingURL=TreeSitterSymbolExtractor.treesitter-based.js.map