UNPKG

hikma-engine

Version:

Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents

430 lines (429 loc) 18.3 kB
"use strict"; /** * @file Responsible for parsing AST of supported languages and extracting code structure. * It identifies and creates various node types (CodeNode, FileNode, * TestNode, FunctionNode) and their relationships (edges). * Polyglot AST parser supporting multiple languages. */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.AstParser = void 0; const logger_1 = require("../utils/logger"); const error_handling_1 = require("../utils/error-handling"); const fs = __importStar(require("fs")); const path = __importStar(require("path")); const ts = __importStar(require("typescript")); class AstParser { /** * @param {string} projectRoot - The absolute path to the root of the project. * @param {ConfigManager} config - Configuration manager instance. * @param {string} repoId - The repository ID for foreign key relationships. */ constructor(projectRoot, config, repoId) { this.logger = (0, logger_1.getLogger)('AstParser'); this.nodes = []; this.edges = []; this.pathToIdMap = new Map(); this.projectRoot = projectRoot; this.config = config; this.repoId = repoId; this.logger.info('AstParser initialized', { projectRoot, repoId }); } getNodes() { return this.nodes; } getEdges() { return this.edges; } /** * Parses the given files and extracts nodes and edges. * @param {string[]} filePaths – files to parse * @param {Map<string,string>} idMap – absolute path → FileNode.id * @returns {Promise<void>} */ async parseFiles(filePaths, idMap = new Map()) { const op = this.logger.operation('AST parsing'); try { this.logger.info(`Starting AST parsing for ${filePaths.length} files`); /* reset state */ this.nodes = []; this.edges = []; this.pathToIdMap = idMap; /* basic file/dir nodes first */ this.createFileNodes(filePaths); /* per-file detailed parsing */ for (const filePath of filePaths) { const lang = this.detectLanguage(filePath); try { switch (lang) { case 'typescript': case 'javascript': await this.parseTypeScriptFile(filePath); break; default: this.logger.debug(`Skipping detailed parsing for ${lang}: ${filePath}`); } } catch (err) { this.logger.warn(`Failed to parse file: ${filePath}`, { error: (0, error_handling_1.getErrorMessage)(err) }); } } /* relationships & call graph */ this.createEdges(); this.buildCallGraph(); this.logger.info('AST parsing completed', { totalNodes: this.nodes.length, totalEdges: this.edges.length, nodeTypes: this.getNodeTypeStats(), }); op(); } catch (err) { this.logger.error('AST parsing failed', { error: (0, error_handling_1.getErrorMessage)(err) }); op(); throw err; } } /* ------------------------------------------------------------------ */ /* Helpers */ /* ------------------------------------------------------------------ */ isTestFile(filePath) { const patterns = [ /\.test\./, /\.spec\./, /_test\./, /_spec\./, /test_/i, /spec_/i, /tests?\//i, /specs?\//i, /__tests?__/i, /__specs?__/i, ]; return patterns.some(p => p.test(filePath.toLowerCase())); } detectLanguage(filePath) { const ext = path.extname(filePath).toLowerCase(); const map = { '.ts': 'typescript', '.tsx': 'typescript', '.js': 'javascript', '.jsx': 'javascript', '.py': 'python', '.java': 'java', '.go': 'go', '.c': 'c', '.cc': 'cpp', '.cxx': 'cpp', '.cpp': 'cpp', '.h': 'c', '.hpp': 'cpp', '.cs': 'csharp', '.rb': 'ruby', '.php': 'php', '.html': 'html', '.css': 'css', '.scss': 'scss', '.less': 'less', '.json': 'json', '.xml': 'xml', '.yaml': 'yaml', '.yml': 'yaml', '.md': 'markdown', '.rst': 'restructuredtext', }; return map[ext] || 'unknown'; } createNodeId(type, props) { switch (type) { case 'FileNode': return `file:${props.filePath}`; case 'CodeNode': return `code:${props.filePath}:${props.name}:${props.startLine}:${props.startColumn || 0}`; case 'TestNode': return `test:${props.filePath}:${props.name}:${props.startLine}:${props.startColumn || 0}`; case 'FunctionNode': return `func:${props.filePath}:${props.name}:${props.startLine}:${props.startColumn || 0}`; default: return `${type.toLowerCase()}:${Date.now()}:${Math.random()}`; } } /* ------------------------------------------------------------------ */ /* TypeScript / JavaScript parsing */ /* ------------------------------------------------------------------ */ async parseTypeScriptFile(filePath) { const content = await fs.promises.readFile(filePath, 'utf-8'); const sourceFile = ts.createSourceFile(filePath, content, ts.ScriptTarget.Latest, true); const visit = (node) => { // Functions (incl. arrow, methods) if (ts.isFunctionDeclaration(node) || ts.isMethodDeclaration(node) || ts.isArrowFunction(node)) { const func = this.extractFunctionDetails(node, filePath); if (func) { this.nodes.push(func); this.extractFunctionCalls(node, func.id, sourceFile); } } // Classes, interfaces remain CodeNode else if (ts.isClassDeclaration(node)) this.extractCodeNode(node, filePath, 'class'); else if (ts.isInterfaceDeclaration(node)) this.extractCodeNode(node, filePath, 'interface'); // Test functions → TestNode if (this.isTestFile(filePath) && (ts.isFunctionDeclaration(node) || ts.isMethodDeclaration(node))) { this.extractTestNode(node, filePath); } ts.forEachChild(node, visit); }; visit(sourceFile); } /* ------------------------------------------------------------------ */ /* Extractors */ /* ------------------------------------------------------------------ */ extractFunctionDetails(node, filePath) { const sf = node.getSourceFile(); const start = sf.getLineAndCharacterOfPosition(node.getStart()); const end = sf.getLineAndCharacterOfPosition(node.getEnd()); let name = 'anonymous'; let signature = ''; let returnType = 'any'; let accessLevel = 'public'; if (ts.isFunctionDeclaration(node) || ts.isMethodDeclaration(node)) { name = node.name?.getText() || 'anonymous'; signature = node.getText().split('{')[0].trim(); if (node.type) returnType = node.type.getText(); if (node.modifiers) { for (const m of node.modifiers) { if (m.kind === ts.SyntaxKind.PrivateKeyword) accessLevel = 'private'; else if (m.kind === ts.SyntaxKind.ProtectedKeyword) accessLevel = 'protected'; } } } else if (ts.isArrowFunction(node)) { signature = node.getText().split('=>')[0].trim(); if (node.type) returnType = node.type.getText(); } else { return null; } const func = { id: this.createNodeId('FunctionNode', { filePath, name, startLine: start.line + 1, startColumn: start.character }), type: 'FunctionNode', properties: { name, signature, returnType, accessLevel, filePath, fileId: this.pathToIdMap.get(filePath) || '', startLine: start.line + 1, endLine: end.line + 1, body: node.getText(), callsMethods: [], calledByMethods: [], usesExternalMethods: false, internalCallGraph: [], transitiveCallDepth: 0, }, }; return func; } extractCodeNode(node, filePath, nodeType) { const sf = node.getSourceFile(); const start = sf.getLineAndCharacterOfPosition(node.getStart()); const end = sf.getLineAndCharacterOfPosition(node.getEnd()); let name = 'anonymous'; let signature = ''; if (ts.isClassDeclaration(node)) { name = node.name?.getText() || 'anonymous'; signature = `class ${name}`; } else if (ts.isInterfaceDeclaration(node)) { name = node.name?.getText() || 'anonymous'; signature = `interface ${name}`; } const cn = { id: this.createNodeId('CodeNode', { filePath, name, startLine: start.line + 1, startColumn: start.character }), type: 'CodeNode', properties: { name, signature, body: node.getText(), language: this.detectLanguage(filePath), filePath, startLine: start.line + 1, endLine: end.line + 1, }, }; this.nodes.push(cn); } extractTestNode(node, filePath) { const sf = node.getSourceFile(); const start = sf.getLineAndCharacterOfPosition(node.getStart()); const end = sf.getLineAndCharacterOfPosition(node.getEnd()); const name = (ts.isFunctionDeclaration(node) || ts.isMethodDeclaration(node)) ? node.name?.getText() || 'anonymous test' : 'anonymous test'; const tn = { id: this.createNodeId('TestNode', { filePath, name, startLine: start.line + 1, startColumn: start.character }), type: 'TestNode', properties: { name, filePath, startLine: start.line + 1, endLine: end.line + 1, framework: this.detectTestFramework(filePath), testBody: node.getText(), }, }; this.nodes.push(tn); } detectTestFramework(filePath) { const n = path.basename(filePath).toLowerCase(); if (n.includes('jest')) return 'jest'; if (n.includes('mocha')) return 'mocha'; if (n.includes('jasmine')) return 'jasmine'; if (n.includes('vitest')) return 'vitest'; return 'unknown'; } determineFileType(filePath) { const lowerPath = filePath.toLowerCase(); // Check for test files if (this.isTestFile(filePath)) { return 'test'; } // Check for vendor/third-party files if (lowerPath.includes('node_modules') || lowerPath.includes('vendor') || lowerPath.includes('third_party')) { return 'vendor'; } // Check for config files const configPatterns = [ 'package.json', 'tsconfig.json', 'jest.config', 'webpack.config', '.eslintrc', '.prettierrc', '.gitignore', '.env', 'dockerfile', 'makefile', 'cmake', '.yml', '.yaml', '.toml', '.ini' ]; if (configPatterns.some(pattern => lowerPath.includes(pattern))) { return 'config'; } // Check for development/build files const devPatterns = ['gulpfile', 'gruntfile', 'rollup.config', 'vite.config']; if (devPatterns.some(pattern => lowerPath.includes(pattern))) { return 'dev'; } // Default to source return 'source'; } generateContentHash(filePath) { // Simple hash generation based on file path // In a real implementation, this would hash the actual file content let hash = 0; for (let i = 0; i < filePath.length; i++) { const char = filePath.charCodeAt(i); hash = ((hash << 5) - hash) + char; hash = hash & hash; // Convert to 32-bit integer } return Math.abs(hash).toString(16); } /* ------------------------------------------------------------------ */ /* File nodes only - directories removed */ /* ------------------------------------------------------------------ */ createFileNodes(filePaths) { // FileNode creation is now handled by NodeCreator in the indexing pipeline // This method is no longer needed but kept for compatibility } createEdges() { const files = this.nodes.filter(n => n.type === 'FileNode'); const codes = this.nodes.filter(n => n.type === 'CodeNode'); const tests = this.nodes.filter(n => n.type === 'TestNode'); // Code/Test -> File [...codes, ...tests].forEach(n => { const f = files.find(fi => path.resolve(this.projectRoot, fi.properties.filePath) === n.properties.filePath); if (f) this.edges.push({ source: n.id, target: f.id, type: 'DEFINED_IN' }); }); // Note: Directory relationships are no longer created since DirectoryNode is removed } /* ------------------------------------------------------------------ */ /* Call-graph helpers */ /* ------------------------------------------------------------------ */ extractFunctionCalls(node, callerId, sf) { const visit = (n) => { if (ts.isCallExpression(n)) { const calleeName = n.expression.getText(); const callee = this.nodes.find((x) => x.type === 'FunctionNode' && x.properties.name === calleeName && x.properties.filePath === sf.fileName); if (callee) { this.edges.push({ type: 'CALLS', source: callerId, target: callee.id, properties: {}, }); } } ts.forEachChild(n, visit); }; ts.forEachChild(node, visit); } buildCallGraph() { const funcs = this.nodes.filter(n => n.type === 'FunctionNode'); const callMap = new Map(); const calledByMap = new Map(); funcs.forEach(f => { callMap.set(f.id, []); calledByMap.set(f.id, []); f.properties.callsMethods = []; f.properties.calledByMethods = []; f.properties.usesExternalMethods = false; f.properties.internalCallGraph = []; f.properties.transitiveCallDepth = 0; }); this.edges.forEach(e => { if (e.type === 'CALLS') { callMap.get(e.source).push(e.target); calledByMap.get(e.target).push(e.source); } }); funcs.forEach(f => { f.properties.callsMethods = callMap.get(f.id); f.properties.calledByMethods = calledByMap.get(f.id); const callees = f.properties.callsMethods.map(id => funcs.find(fn => fn.id === id)).filter(Boolean); f.properties.usesExternalMethods = callees.some(c => c.properties.fileId !== f.properties.fileId); f.properties.internalCallGraph = callees .filter(c => c.properties.fileId === f.properties.fileId) .map(c => c.id); f.properties.transitiveCallDepth = this.computeTransitiveDepth(f.id, callMap); }); } computeTransitiveDepth(id, callMap) { const visited = new Set(); const dfs = (curr, depth) => { if (visited.has(curr)) return depth; visited.add(curr); const calls = callMap.get(curr) || []; return Math.max(depth, ...calls.map(c => dfs(c, depth + 1))); }; return dfs(id, 0); } getNodeTypeStats() { const stats = {}; this.nodes.forEach(n => (stats[n.type] = (stats[n.type] || 0) + 1)); return stats; } } exports.AstParser = AstParser;