@wildcard-ai/deepcodex
Version:
Advanced codebase indexing and semantic search MCP server
409 lines • 16.3 kB
JavaScript
/**
* TreeSitterSymbolExtractor - FULL Tree-sitter AST Implementation
*
* This is the proper Tree-sitter implementation with real AST parsing,
* replacing the regex-based fallback approach with true structural understanding.
*
* Features:
* - Real Tree-sitter parsers for TypeScript/JavaScript
* - AST-based scope detection and symbol extraction
* - Semantic understanding of code structure
* - No regex patterns or manual brace counting
*/
import { Logger } from '../../utils/Logger.js';
// Import Tree-sitter modules
let Parser;
let TypeScriptLanguage;
let JavaScriptLanguage;
// Lazy load Tree-sitter modules to handle import issues
async function loadTreeSitter() {
if (!Parser) {
Parser = (await import('tree-sitter')).default;
const tsModule = await import('tree-sitter-typescript');
const jsModule = await import('tree-sitter-javascript');
TypeScriptLanguage = tsModule.default.typescript;
JavaScriptLanguage = jsModule.default;
}
}
export class TreeSitterSymbolExtractorFull {
parsers = new Map();
initialized = false;
logger;
constructor() {
this.logger = new Logger('TREESITTER-FULL', 'info');
}
/**
* Initialize Tree-sitter parsers with real implementations
*/
async initialize() {
if (this.initialized)
return;
try {
await loadTreeSitter();
// Initialize TypeScript parser
const tsParser = new Parser();
tsParser.setLanguage(TypeScriptLanguage);
this.parsers.set('typescript', tsParser);
// Initialize JavaScript parser
const jsParser = new Parser();
jsParser.setLanguage(JavaScriptLanguage);
this.parsers.set('javascript', jsParser);
this.initialized = true;
this.logger.info('✅ Tree-sitter parsers initialized successfully');
}
catch (error) {
this.logger.error(`❌ Tree-sitter initialization failed: ${error}`);
throw error;
}
}
/**
* Extract symbols using real Tree-sitter AST parsing with intelligent chunking
*/
async extractSymbols(content, language, filePath) {
await this.initialize();
if (!this.parsers.has(language)) {
throw new Error(`Unsupported language: ${language}`);
}
const parser = this.parsers.get(language);
try {
// Check if content exceeds Tree-sitter's 32KB limit
const TREESITTER_LIMIT = 32768; // 2^15 bytes
if (content.length <= TREESITTER_LIMIT) {
// Small file - parse directly
return await this.parseContentDirectly(parser, content, language, filePath);
}
else {
// Large file - use intelligent chunking
return await this.parseContentWithChunking(parser, content, language, filePath);
}
}
catch (error) {
this.logger.error(`AST parsing failed for ${filePath}: ${error}`);
throw error;
}
}
/**
* Parse content directly (for files under 32KB)
*/
async parseContentDirectly(parser, content, language, filePath) {
const tree = parser.parse(content);
const symbols = [];
const imports = [];
const exports = [];
const docstrings = [];
const parseErrors = [];
await this.traverseASTNode(tree.rootNode, symbols, imports, exports, docstrings, parseErrors, language, []);
this.logger.info(`Extracted ${symbols.length} symbols from ${filePath}`);
return { symbols, imports, exports, docstrings, scopeGraph: { nodes: [], edges: [] }, parseErrors };
}
/**
* Parse large content using simple line-based chunking
*/
async parseContentWithChunking(parser, content, language, filePath) {
// Use simple line-based chunking that's guaranteed to work
return await this.parseWithLineBoundaries(parser, content, language, filePath);
}
/**
* Fallback: Parse with simple line boundaries
*/
async parseWithLineBoundaries(parser, content, language, filePath) {
// Simple approach: split into reasonably sized chunks at line boundaries
const lines = content.split('\n');
const LINES_PER_CHUNK = 500; // Small enough to be safe
const allSymbols = [];
const allImports = [];
const allExports = [];
const allDocstrings = [];
const allParseErrors = [];
for (let i = 0; i < lines.length; i += LINES_PER_CHUNK) {
const chunkLines = lines.slice(i, i + LINES_PER_CHUNK);
const chunkContent = chunkLines.join('\n');
try {
const result = await this.parseContentDirectly(parser, chunkContent, language, `${filePath}:lines${i}-${i + chunkLines.length}`);
// Adjust line numbers
result.symbols.forEach(symbol => {
symbol.startLine += i;
symbol.endLine += i;
});
allSymbols.push(...result.symbols);
allImports.push(...result.imports);
allExports.push(...result.exports);
allDocstrings.push(...result.docstrings);
allParseErrors.push(...result.parseErrors);
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
this.logger.warn(`Line chunk ${i} parsing failed: ${errorMessage}`);
allParseErrors.push(`Lines ${i}-${i + chunkLines.length}: ${errorMessage}`);
}
}
return {
symbols: allSymbols,
imports: allImports,
exports: Array.from(new Set(allExports)),
docstrings: allDocstrings,
scopeGraph: { nodes: [], edges: [] },
parseErrors: allParseErrors
};
}
/**
* CORE: AST Node Traversal with Semantic Understanding
*/
async traverseASTNode(node, symbols, imports, exports, docstrings, parseErrors, language, scopeStack) {
const nodeType = node.type;
const startLine = node.startPosition.row + 1;
const endLine = node.endPosition.row + 1;
// Process TypeScript/JavaScript nodes
if (language === 'typescript' || language === 'javascript') {
await this.processTypeScriptASTNode(node, symbols, imports, exports, scopeStack, startLine, endLine);
}
// Update scope stack for child traversal
const newScopeStack = [...scopeStack];
// Add to scope stack based on AST node type (not regex patterns)
switch (nodeType) {
case 'class_declaration':
newScopeStack.push('class');
break;
case 'method_definition':
case 'function_declaration':
case 'arrow_function':
case 'function_expression':
newScopeStack.push('method');
break;
}
// Recursively traverse all children with proper scope context
for (const child of node.namedChildren) {
await this.traverseASTNode(child, symbols, imports, exports, docstrings, parseErrors, language, newScopeStack);
}
}
/**
* Process TypeScript/JavaScript AST nodes with semantic understanding
*/
async processTypeScriptASTNode(node, symbols, imports, exports, scopeStack, startLine, endLine) {
const nodeType = node.type;
const isExported = this.isExportedNode(node);
const currentScope = this.determineScope(scopeStack, isExported);
switch (nodeType) {
// CLASS DECLARATIONS
case 'class_declaration':
const className = this.getNodeName(node);
if (className) {
symbols.push({
name: className,
type: 'class',
startLine,
endLine,
startColumn: node.startPosition.column,
endColumn: node.endPosition.column,
scope: currentScope
});
if (isExported)
exports.push(className);
}
break;
// INTERFACE DECLARATIONS
case 'interface_declaration':
const interfaceName = this.getNodeName(node);
if (interfaceName) {
symbols.push({
name: interfaceName,
type: 'interface',
startLine,
endLine,
startColumn: node.startPosition.column,
endColumn: node.endPosition.column,
scope: currentScope
});
if (isExported)
exports.push(interfaceName);
}
break;
// TYPE DECLARATIONS
case 'type_alias_declaration':
const typeName = this.getNodeName(node);
if (typeName) {
symbols.push({
name: typeName,
type: 'type',
startLine,
endLine,
startColumn: node.startPosition.column,
endColumn: node.endPosition.column,
scope: currentScope
});
if (isExported)
exports.push(typeName);
}
break;
// FUNCTION DECLARATIONS
case 'function_declaration':
case 'method_definition':
// Only extract functions/methods at appropriate scope levels
if (!this.isInMethodScope(scopeStack)) {
const functionName = this.getNodeName(node);
if (functionName) {
symbols.push({
name: functionName,
type: nodeType === 'method_definition' ? 'method' : 'function',
startLine,
endLine,
startColumn: node.startPosition.column,
endColumn: node.endPosition.column,
scope: currentScope
});
if (isExported)
exports.push(functionName);
}
}
break;
// VARIABLE DECLARATIONS - CRITICAL FILTERING
// TypeScript uses 'lexical_declaration' for const/let declarations
case 'lexical_declaration':
case 'variable_declaration':
await this.processVariableDeclarationAST(node, symbols, exports, scopeStack, isExported, startLine, endLine);
break;
// IMPORT STATEMENTS
case 'import_statement':
await this.processImportAST(node, imports);
break;
}
}
/**
* CRITICAL: Process variable declarations with AST-based semantic filtering
*/
async processVariableDeclarationAST(node, symbols, exports, scopeStack, isExported, startLine, endLine) {
// CORE RULE: Never extract variables inside methods/functions
if (this.isInMethodScope(scopeStack)) {
return; // Skip all method-scoped variables
}
// Extract variable declarators from AST
const declarators = node.namedChildren.filter(child => child.type === 'variable_declarator');
for (const declarator of declarators) {
const varName = this.getNodeName(declarator);
if (!varName)
continue;
// SEMANTIC FILTERING: Only extract meaningful variables
const shouldExtract = isExported || this.isVariableSemanticalleMeaningful(declarator);
if (shouldExtract) {
const varType = this.getVariableTypeFromAST(node);
const currentScope = this.determineScope(scopeStack, isExported);
symbols.push({
name: varName,
type: varType,
startLine,
endLine,
startColumn: node.startPosition.column,
endColumn: node.endPosition.column,
scope: currentScope
});
if (isExported)
exports.push(varName);
}
}
}
/**
* Helper: Check if we're inside a method/function scope using AST
*/
isInMethodScope(scopeStack) {
return scopeStack.includes('method') || scopeStack.includes('function');
}
/**
* Helper: Determine if node is exported by checking AST structure
*/
isExportedNode(node) {
// Check parent nodes for export_statement
let current = node.parent;
while (current) {
if (current.type === 'export_statement')
return true;
current = current.parent;
}
// Check for export keyword in children
for (const child of node.children) {
if (child.type === 'export' || child.text === 'export')
return true;
}
return false;
}
/**
* Helper: Get node name from AST structure
*/
getNodeName(node) {
// Try to get name from field
const nameNode = node.childForFieldName('name');
if (nameNode && (nameNode.type === 'identifier' || nameNode.type === 'type_identifier')) {
return nameNode.text;
}
// Fallback: find identifier child
for (const child of node.namedChildren) {
if (child.type === 'identifier' || child.type === 'type_identifier') {
return child.text;
}
}
return null;
}
/**
* Helper: Check if variable is semantically meaningful using AST
*/
isVariableSemanticalleMeaningful(declaratorNode) {
const init = declaratorNode.childForFieldName('value');
if (!init)
return false;
const initType = init.type;
return (initType === 'array' || // Fixed: was 'array_expression'
initType === 'object' || // Fixed: was 'object_expression'
initType === 'arrow_function' ||
initType === 'function_expression' ||
initType === 'new_expression' ||
initType === 'call_expression');
}
/**
* Helper: Get variable type from AST
*/
getVariableTypeFromAST(node) {
for (const child of node.children) {
if (child.text === 'const')
return 'constant';
if (child.text === 'let' || child.text === 'var')
return 'variable';
}
return 'variable';
}
/**
* Helper: Determine scope based on context
*/
determineScope(scopeStack, isExported) {
if (isExported)
return 'export';
if (scopeStack.length > 0)
return 'local';
return 'global';
}
/**
* Helper: Process import statements using AST
*/
async processImportAST(node, imports) {
const source = node.childForFieldName('source');
if (source) {
imports.push({
module: source.text.replace(/['"]/g, ''),
symbols: [], // Would need more detailed processing
isDefault: false,
isNamespace: false,
line: node.startPosition.row + 1,
source: node.text
});
}
}
/**
* Get extraction statistics
*/
getStats() {
return {
initialized: this.initialized,
supportedLanguages: ['typescript', 'javascript'],
availableParsers: Array.from(this.parsers.keys())
};
}
}
//# sourceMappingURL=TreeSitterSymbolExtractor.treesitter-based.js.map