hikma-engine
Version:
Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents
430 lines (429 loc) • 18.3 kB
JavaScript
/**
* @file Responsible for parsing AST of supported languages and extracting code structure.
* It identifies and creates various node types (CodeNode, FileNode,
* TestNode, FunctionNode) and their relationships (edges).
* Polyglot AST parser supporting multiple languages.
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.AstParser = void 0;
const logger_1 = require("../utils/logger");
const error_handling_1 = require("../utils/error-handling");
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const ts = __importStar(require("typescript"));
class AstParser {
/**
* @param {string} projectRoot - The absolute path to the root of the project.
* @param {ConfigManager} config - Configuration manager instance.
* @param {string} repoId - The repository ID for foreign key relationships.
*/
constructor(projectRoot, config, repoId) {
this.logger = (0, logger_1.getLogger)('AstParser');
this.nodes = [];
this.edges = [];
this.pathToIdMap = new Map();
this.projectRoot = projectRoot;
this.config = config;
this.repoId = repoId;
this.logger.info('AstParser initialized', { projectRoot, repoId });
}
getNodes() {
return this.nodes;
}
getEdges() {
return this.edges;
}
/**
* Parses the given files and extracts nodes and edges.
* @param {string[]} filePaths – files to parse
* @param {Map<string,string>} idMap – absolute path → FileNode.id
* @returns {Promise<void>}
*/
async parseFiles(filePaths, idMap = new Map()) {
const op = this.logger.operation('AST parsing');
try {
this.logger.info(`Starting AST parsing for ${filePaths.length} files`);
/* reset state */
this.nodes = [];
this.edges = [];
this.pathToIdMap = idMap;
/* basic file/dir nodes first */
this.createFileNodes(filePaths);
/* per-file detailed parsing */
for (const filePath of filePaths) {
const lang = this.detectLanguage(filePath);
try {
switch (lang) {
case 'typescript':
case 'javascript':
await this.parseTypeScriptFile(filePath);
break;
default:
this.logger.debug(`Skipping detailed parsing for ${lang}: ${filePath}`);
}
}
catch (err) {
this.logger.warn(`Failed to parse file: ${filePath}`, { error: (0, error_handling_1.getErrorMessage)(err) });
}
}
/* relationships & call graph */
this.createEdges();
this.buildCallGraph();
this.logger.info('AST parsing completed', {
totalNodes: this.nodes.length,
totalEdges: this.edges.length,
nodeTypes: this.getNodeTypeStats(),
});
op();
}
catch (err) {
this.logger.error('AST parsing failed', { error: (0, error_handling_1.getErrorMessage)(err) });
op();
throw err;
}
}
/* ------------------------------------------------------------------ */
/* Helpers */
/* ------------------------------------------------------------------ */
isTestFile(filePath) {
const patterns = [
/\.test\./, /\.spec\./, /_test\./, /_spec\./,
/test_/i, /spec_/i, /tests?\//i, /specs?\//i,
/__tests?__/i, /__specs?__/i,
];
return patterns.some(p => p.test(filePath.toLowerCase()));
}
detectLanguage(filePath) {
const ext = path.extname(filePath).toLowerCase();
const map = {
'.ts': 'typescript', '.tsx': 'typescript',
'.js': 'javascript', '.jsx': 'javascript',
'.py': 'python', '.java': 'java', '.go': 'go',
'.c': 'c', '.cc': 'cpp', '.cxx': 'cpp', '.cpp': 'cpp', '.h': 'c', '.hpp': 'cpp',
'.cs': 'csharp', '.rb': 'ruby', '.php': 'php',
'.html': 'html', '.css': 'css', '.scss': 'scss', '.less': 'less',
'.json': 'json', '.xml': 'xml', '.yaml': 'yaml', '.yml': 'yaml',
'.md': 'markdown', '.rst': 'restructuredtext',
};
return map[ext] || 'unknown';
}
createNodeId(type, props) {
switch (type) {
case 'FileNode': return `file:${props.filePath}`;
case 'CodeNode': return `code:${props.filePath}:${props.name}:${props.startLine}:${props.startColumn || 0}`;
case 'TestNode': return `test:${props.filePath}:${props.name}:${props.startLine}:${props.startColumn || 0}`;
case 'FunctionNode': return `func:${props.filePath}:${props.name}:${props.startLine}:${props.startColumn || 0}`;
default:
return `${type.toLowerCase()}:${Date.now()}:${Math.random()}`;
}
}
/* ------------------------------------------------------------------ */
/* TypeScript / JavaScript parsing */
/* ------------------------------------------------------------------ */
async parseTypeScriptFile(filePath) {
const content = await fs.promises.readFile(filePath, 'utf-8');
const sourceFile = ts.createSourceFile(filePath, content, ts.ScriptTarget.Latest, true);
const visit = (node) => {
// Functions (incl. arrow, methods)
if (ts.isFunctionDeclaration(node) ||
ts.isMethodDeclaration(node) ||
ts.isArrowFunction(node)) {
const func = this.extractFunctionDetails(node, filePath);
if (func) {
this.nodes.push(func);
this.extractFunctionCalls(node, func.id, sourceFile);
}
}
// Classes, interfaces remain CodeNode
else if (ts.isClassDeclaration(node))
this.extractCodeNode(node, filePath, 'class');
else if (ts.isInterfaceDeclaration(node))
this.extractCodeNode(node, filePath, 'interface');
// Test functions → TestNode
if (this.isTestFile(filePath) &&
(ts.isFunctionDeclaration(node) || ts.isMethodDeclaration(node))) {
this.extractTestNode(node, filePath);
}
ts.forEachChild(node, visit);
};
visit(sourceFile);
}
/* ------------------------------------------------------------------ */
/* Extractors */
/* ------------------------------------------------------------------ */
extractFunctionDetails(node, filePath) {
const sf = node.getSourceFile();
const start = sf.getLineAndCharacterOfPosition(node.getStart());
const end = sf.getLineAndCharacterOfPosition(node.getEnd());
let name = 'anonymous';
let signature = '';
let returnType = 'any';
let accessLevel = 'public';
if (ts.isFunctionDeclaration(node) || ts.isMethodDeclaration(node)) {
name = node.name?.getText() || 'anonymous';
signature = node.getText().split('{')[0].trim();
if (node.type)
returnType = node.type.getText();
if (node.modifiers) {
for (const m of node.modifiers) {
if (m.kind === ts.SyntaxKind.PrivateKeyword)
accessLevel = 'private';
else if (m.kind === ts.SyntaxKind.ProtectedKeyword)
accessLevel = 'protected';
}
}
}
else if (ts.isArrowFunction(node)) {
signature = node.getText().split('=>')[0].trim();
if (node.type)
returnType = node.type.getText();
}
else {
return null;
}
const func = {
id: this.createNodeId('FunctionNode', { filePath, name, startLine: start.line + 1, startColumn: start.character }),
type: 'FunctionNode',
properties: {
name,
signature,
returnType,
accessLevel,
filePath,
fileId: this.pathToIdMap.get(filePath) || '',
startLine: start.line + 1,
endLine: end.line + 1,
body: node.getText(),
callsMethods: [],
calledByMethods: [],
usesExternalMethods: false,
internalCallGraph: [],
transitiveCallDepth: 0,
},
};
return func;
}
extractCodeNode(node, filePath, nodeType) {
const sf = node.getSourceFile();
const start = sf.getLineAndCharacterOfPosition(node.getStart());
const end = sf.getLineAndCharacterOfPosition(node.getEnd());
let name = 'anonymous';
let signature = '';
if (ts.isClassDeclaration(node)) {
name = node.name?.getText() || 'anonymous';
signature = `class ${name}`;
}
else if (ts.isInterfaceDeclaration(node)) {
name = node.name?.getText() || 'anonymous';
signature = `interface ${name}`;
}
const cn = {
id: this.createNodeId('CodeNode', { filePath, name, startLine: start.line + 1, startColumn: start.character }),
type: 'CodeNode',
properties: {
name,
signature,
body: node.getText(),
language: this.detectLanguage(filePath),
filePath,
startLine: start.line + 1,
endLine: end.line + 1,
},
};
this.nodes.push(cn);
}
extractTestNode(node, filePath) {
const sf = node.getSourceFile();
const start = sf.getLineAndCharacterOfPosition(node.getStart());
const end = sf.getLineAndCharacterOfPosition(node.getEnd());
const name = (ts.isFunctionDeclaration(node) || ts.isMethodDeclaration(node))
? node.name?.getText() || 'anonymous test'
: 'anonymous test';
const tn = {
id: this.createNodeId('TestNode', { filePath, name, startLine: start.line + 1, startColumn: start.character }),
type: 'TestNode',
properties: {
name,
filePath,
startLine: start.line + 1,
endLine: end.line + 1,
framework: this.detectTestFramework(filePath),
testBody: node.getText(),
},
};
this.nodes.push(tn);
}
detectTestFramework(filePath) {
const n = path.basename(filePath).toLowerCase();
if (n.includes('jest'))
return 'jest';
if (n.includes('mocha'))
return 'mocha';
if (n.includes('jasmine'))
return 'jasmine';
if (n.includes('vitest'))
return 'vitest';
return 'unknown';
}
determineFileType(filePath) {
const lowerPath = filePath.toLowerCase();
// Check for test files
if (this.isTestFile(filePath)) {
return 'test';
}
// Check for vendor/third-party files
if (lowerPath.includes('node_modules') || lowerPath.includes('vendor') || lowerPath.includes('third_party')) {
return 'vendor';
}
// Check for config files
const configPatterns = [
'package.json', 'tsconfig.json', 'jest.config', 'webpack.config',
'.eslintrc', '.prettierrc', '.gitignore', '.env', 'dockerfile',
'makefile', 'cmake', '.yml', '.yaml', '.toml', '.ini'
];
if (configPatterns.some(pattern => lowerPath.includes(pattern))) {
return 'config';
}
// Check for development/build files
const devPatterns = ['gulpfile', 'gruntfile', 'rollup.config', 'vite.config'];
if (devPatterns.some(pattern => lowerPath.includes(pattern))) {
return 'dev';
}
// Default to source
return 'source';
}
generateContentHash(filePath) {
// Simple hash generation based on file path
// In a real implementation, this would hash the actual file content
let hash = 0;
for (let i = 0; i < filePath.length; i++) {
const char = filePath.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32-bit integer
}
return Math.abs(hash).toString(16);
}
/* ------------------------------------------------------------------ */
/* File nodes only - directories removed */
/* ------------------------------------------------------------------ */
createFileNodes(filePaths) {
// FileNode creation is now handled by NodeCreator in the indexing pipeline
// This method is no longer needed but kept for compatibility
}
createEdges() {
const files = this.nodes.filter(n => n.type === 'FileNode');
const codes = this.nodes.filter(n => n.type === 'CodeNode');
const tests = this.nodes.filter(n => n.type === 'TestNode');
// Code/Test -> File
[...codes, ...tests].forEach(n => {
const f = files.find(fi => path.resolve(this.projectRoot, fi.properties.filePath) === n.properties.filePath);
if (f)
this.edges.push({ source: n.id, target: f.id, type: 'DEFINED_IN' });
});
// Note: Directory relationships are no longer created since DirectoryNode is removed
}
/* ------------------------------------------------------------------ */
/* Call-graph helpers */
/* ------------------------------------------------------------------ */
extractFunctionCalls(node, callerId, sf) {
const visit = (n) => {
if (ts.isCallExpression(n)) {
const calleeName = n.expression.getText();
const callee = this.nodes.find((x) => x.type === 'FunctionNode' &&
x.properties.name === calleeName &&
x.properties.filePath === sf.fileName);
if (callee) {
this.edges.push({
type: 'CALLS',
source: callerId,
target: callee.id,
properties: {},
});
}
}
ts.forEachChild(n, visit);
};
ts.forEachChild(node, visit);
}
buildCallGraph() {
const funcs = this.nodes.filter(n => n.type === 'FunctionNode');
const callMap = new Map();
const calledByMap = new Map();
funcs.forEach(f => {
callMap.set(f.id, []);
calledByMap.set(f.id, []);
f.properties.callsMethods = [];
f.properties.calledByMethods = [];
f.properties.usesExternalMethods = false;
f.properties.internalCallGraph = [];
f.properties.transitiveCallDepth = 0;
});
this.edges.forEach(e => {
if (e.type === 'CALLS') {
callMap.get(e.source).push(e.target);
calledByMap.get(e.target).push(e.source);
}
});
funcs.forEach(f => {
f.properties.callsMethods = callMap.get(f.id);
f.properties.calledByMethods = calledByMap.get(f.id);
const callees = f.properties.callsMethods.map(id => funcs.find(fn => fn.id === id)).filter(Boolean);
f.properties.usesExternalMethods = callees.some(c => c.properties.fileId !== f.properties.fileId);
f.properties.internalCallGraph = callees
.filter(c => c.properties.fileId === f.properties.fileId)
.map(c => c.id);
f.properties.transitiveCallDepth = this.computeTransitiveDepth(f.id, callMap);
});
}
computeTransitiveDepth(id, callMap) {
const visited = new Set();
const dfs = (curr, depth) => {
if (visited.has(curr))
return depth;
visited.add(curr);
const calls = callMap.get(curr) || [];
return Math.max(depth, ...calls.map(c => dfs(c, depth + 1)));
};
return dfs(id, 0);
}
getNodeTypeStats() {
const stats = {};
this.nodes.forEach(n => (stats[n.type] = (stats[n.type] || 0) + 1));
return stats;
}
}
exports.AstParser = AstParser;
;