UNPKG

hikma-engine

Version:

Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents

445 lines (444 loc) 18.2 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.PhaseManager = void 0; const logger_1 = require("../utils/logger"); const PhaseRepository_1 = require("../persistence/PhaseRepository"); const connection_1 = require("../persistence/db/connection"); const schema_1 = require("../persistence/db/schema"); const indexing_1 = require("./indexing"); const models_1 = require("../persistence/models"); const path = __importStar(require("path")); /** * Manages the phase-by-phase execution of the indexing pipeline */ class PhaseManager { constructor(projectRoot, config) { this.errors = []; this.projectRoot = projectRoot; this.config = config; this.logger = (0, logger_1.getLogger)('PhaseManager'); } async initialize() { // Initialize database connection const dbConfig = this.config.getDatabaseConfig(); this.sqliteClient = new connection_1.SQLiteClient(dbConfig.sqlite.path); await this.sqliteClient.connect(); // Initialize schema (0, schema_1.initializeTables)(this.sqliteClient); // Initialize phase repository this.phaseRepo = new PhaseRepository_1.PhaseRepository(this.sqliteClient.getDb()); this.logger.info('PhaseManager initialized successfully'); } async executePhases(options = {}) { const startTime = Date.now(); const phases = []; try { // Handle special commands first if (options.showStatus) { await this.showPhaseStatus(); return this.createEmptyResult(); } if (options.inspectPhase) { await this.inspectPhase(options.inspectPhase); return this.createEmptyResult(); } // Determine indexing strategy const strategy = new indexing_1.IndexingStrategy(this.projectRoot, this.config, this.logger); const indexingStrategy = await strategy.determine(false); // TODO: Add force option const repoId = this.getRepoId(); // Ensure repository record exists before any phase operations await this.ensureRepositoryExists(repoId); // Execute phases based on options const phasesToRun = this.determinePhasesToRun(options); let phase1Data = null; let phase2Data = null; let phase3Data = null; for (const phaseNum of phasesToRun) { const phaseResult = await this.executePhase(phaseNum, { repoId, indexingStrategy, phase1Data, phase2Data, phase3Data, options, }); phases.push(phaseResult); // Store phase data for next phases if (phaseNum === 1) phase1Data = phaseResult.data; if (phaseNum === 2) phase2Data = phaseResult.data; if (phaseNum === 3) phase3Data = phaseResult.data; } // Calculate totals from the final phase data const finalPhaseData = phases.find(p => p.phase === 4)?.data; const totalNodes = finalPhaseData?.finalNodes?.length || 0; const totalEdges = finalPhaseData?.finalEdges?.length || 0; return { totalNodes, totalEdges, processedFiles: phase1Data?.files?.length || 0, isIncremental: indexingStrategy.isIncremental, duration: Date.now() - startTime, phases, errors: [...this.errors], }; } catch (error) { this.logger.error('Phase execution failed', { error }); throw error; } } async executePhase(phaseNum, context) { const startTime = Date.now(); const phaseName = this.getPhaseNameByNumber(phaseNum); this.logger.info(`=== Phase ${phaseNum}: ${phaseName} ===`); try { // Check if phase should be skipped const shouldForce = context.options.forcePhases?.includes(phaseNum); const isComplete = await this.phaseRepo.isPhaseComplete(context.repoId, phaseName); if (isComplete && !shouldForce) { this.logger.info(`Phase ${phaseNum} already complete, loading from database`); const data = await this.loadPhaseData(phaseNum, context.repoId); return { phase: phaseNum, name: phaseName, duration: Date.now() - startTime, itemsProcessed: this.getItemCount(data), fromCache: true, data, }; } // Mark phase as started await this.phaseRepo.markPhaseStarted(context.repoId, phaseName, context.indexingStrategy.currentCommitHash); // Execute the actual phase const data = await this.executePhaseLogic(phaseNum, context); // Persist phase data if (!context.options.dryRun) { await this.persistPhaseData(phaseNum, context.repoId, data); await this.phaseRepo.markPhaseCompleted(context.repoId, phaseName, { itemsProcessed: this.getItemCount(data), }); } this.logger.info(`Phase ${phaseNum} completed successfully`, { itemsProcessed: this.getItemCount(data), duration: Date.now() - startTime, }); return { phase: phaseNum, name: phaseName, duration: Date.now() - startTime, itemsProcessed: this.getItemCount(data), fromCache: false, data, }; } catch (error) { await this.phaseRepo.markPhaseFailed(context.repoId, phaseName, error.message); throw error; } } async executePhaseLogic(phaseNum, context) { switch (phaseNum) { case 1: return await this.executePhase1(context); case 2: return await this.executePhase2(context); case 3: return await this.executePhase3(context); case 4: return await this.executePhase4(context); default: throw new Error(`Unknown phase: ${phaseNum}`); } } async executePhase1(context) { // Phase 1: Data Discovery const fileDiscovery = new indexing_1.FileDiscovery(this.projectRoot, this.config); const filesToProcess = await fileDiscovery.discoverFiles(context.indexingStrategy.changedFiles); const nodeCreator = new indexing_1.NodeCreator(); const repoNode = nodeCreator.createRepositoryNode(this.projectRoot); const fileNodes = nodeCreator.createFileNodes(filesToProcess, context.repoId); // Use consistent repo ID // Convert to DTOs const repositoryDTO = new models_1.RepositoryDTO(context.repoId, // Use consistent repo ID repoNode.properties.repoPath, repoNode.properties.repoName); const fileDTOs = fileNodes.map((node) => new models_1.FileDTO(node.id, node.properties.repoId, node.properties.filePath, node.properties.fileName, { file_extension: node.properties.fileExtension, language: node.properties.language, size_kb: node.properties.sizeKb, content_hash: node.properties.contentHash, file_type: node.properties.fileType, })); return { repository: repositoryDTO, files: fileDTOs, originalFileMetadata: filesToProcess, }; } async executePhase2(context) { // Phase 2: AST Parsing and Structure Extraction const astExtractor = new indexing_1.AstExtractor(this.projectRoot, this.config, context.phase1Data.repository.id); const pathToIdMap = new Map(); context.phase1Data.files.forEach((file) => { pathToIdMap.set(file.file_path, file.id); }); const { nodes: astNodes, edges: astEdges } = await astExtractor.extract(context.phase1Data.originalFileMetadata, pathToIdMap); return { astNodes, astEdges, }; } async executePhase3(context) { // Phase 3: AI Summary Generation if (context.options.skipAISummary) { return { summaries: [] }; } const summaryExtractor = new indexing_1.SummaryExtractor(this.config); const nodesToSummarize = [ ...context.phase1Data.files, ...context.phase2Data.astNodes, ]; const nodesWithSummaries = await summaryExtractor.extract(nodesToSummarize); // Sanitize the result to avoid circular references in logging const sanitizedSummaries = nodesWithSummaries.map(node => { if (node.type === 'FunctionNode') { // Remove potentially circular reference arrays for logging const { callsMethods, calledByMethods, internalCallGraph, ...safeProperties } = node.properties; return { ...node, properties: { ...safeProperties, callsMethodsCount: callsMethods?.length || 0, calledByMethodsCount: calledByMethods?.length || 0, internalCallGraphCount: internalCallGraph?.length || 0 } }; } return node; }); return { summaries: sanitizedSummaries, summariesCount: nodesWithSummaries.length }; } async executePhase4(context) { // Phase 4: Final Assembly and Vector Embeddings // Aggregate all nodes and edges from previous phases const allNodes = []; const allEdges = []; // Add AST nodes and edges from phase 2 if (context.phase2Data?.astNodes) { allNodes.push(...context.phase2Data.astNodes); } if (context.phase2Data?.astEdges) { allEdges.push(...context.phase2Data.astEdges); } // Add enriched nodes from phase 3 (summaries) if (context.phase3Data?.summaries) { // Replace or merge with existing nodes that have summaries const summaryMap = new Map(); context.phase3Data.summaries.forEach((node) => { summaryMap.set(node.id, node); }); // Update existing nodes with summaries or add new ones for (let i = 0; i < allNodes.length; i++) { const existingNode = allNodes[i]; if (summaryMap.has(existingNode.id)) { allNodes[i] = summaryMap.get(existingNode.id); summaryMap.delete(existingNode.id); } } } // Phase 4 Part 2: Vector Embeddings if (context.options.skipEmbeddings) { this.logger.info('Skipping vector embedding generation.'); return { finalNodes: allNodes, finalEdges: allEdges, }; } this.logger.info('Starting vector embedding generation...'); // types of the nodes for logging purpose const tempNodes = allNodes.filter((node) => node.type === undefined); this.logger.info(`filtered nodes: ${tempNodes.length}`, tempNodes); const embeddingExtractor = new indexing_1.EmbeddingExtractor(this.config); const nodesWithEmbeddings = await embeddingExtractor.extract(allNodes); this.logger.info(`Phase 4 final assembly and embedding completed`, { totalNodes: nodesWithEmbeddings.length, totalEdges: allEdges.length }); return { finalNodes: nodesWithEmbeddings, finalEdges: allEdges, }; } async persistPhaseData(phaseNum, repoId, data) { switch (phaseNum) { case 1: await this.phaseRepo.persistPhase1Data(data); break; case 2: await this.phaseRepo.persistPhase2Data({ repoId, astNodes: data.astNodes || [], astEdges: data.astEdges || [] }); break; case 4: await this.phaseRepo.persistPhase4Data({ repoId, finalNodes: data.finalNodes || [], finalEdges: data.finalEdges || [] }); break; // TODO: Add phase 3 } } async loadPhaseData(phaseNum, repoId) { switch (phaseNum) { case 1: return await this.phaseRepo.loadPhase1Data(repoId); case 2: return await this.phaseRepo.loadPhase2Data(repoId); // TODO: Add other phases default: return null; } } determinePhasesToRun(options) { if (options.runPhases) { return options.runPhases; } if (options.fromPhase) { return [ options.fromPhase, options.fromPhase + 1, options.fromPhase + 2, options.fromPhase + 3, ].filter((p) => p <= 4); } return [1, 2, 3, 4]; // All phases by default } getPhaseNameByNumber(phaseNum) { const names = { 1: 'data_discovery', 2: 'structure_extraction', 3: 'ai_enrichment', 4: 'final_assembly', }; return names[phaseNum] || `phase_${phaseNum}`; } getItemCount(data) { if (!data) return 0; if (data.files) return data.files.length; if (data.astNodes) return data.astNodes.length; if (data.summaries) return data.summaries.length; return 0; } getRepoId() { // Generate consistent repo ID based on project path const crypto = require('crypto'); const normalizedPath = path.resolve(this.projectRoot); return crypto.createHash('sha256').update(normalizedPath).digest('hex').substring(0, 16); } async ensureRepositoryExists(repoId) { try { // Check if repository already exists const existingRepo = await this.phaseRepo.loadPhase1Data(repoId); if (existingRepo.repository) { this.logger.debug('Repository record already exists', { repoId }); return; } // Create repository record const nodeCreator = new indexing_1.NodeCreator(); const repoNode = nodeCreator.createRepositoryNode(this.projectRoot); const repositoryDTO = new models_1.RepositoryDTO(repoId, // Use consistent repo ID repoNode.properties.repoPath, repoNode.properties.repoName); // Persist repository record await this.phaseRepo.persistPhase1Data({ repository: repositoryDTO, files: [] }); this.logger.debug('Repository record created', { repoId, repoPath: repositoryDTO.repo_path }); } catch (error) { this.logger.warn('Failed to ensure repository exists, will create during Phase 1', { error: error.message }); // Don't throw - Phase 1 will handle repository creation } } async showPhaseStatus() { const repoId = this.getRepoId(); const statuses = await this.phaseRepo.getPhaseStatuses(repoId); console.log('\n=== Phase Status ==='); console.table(statuses.map((s) => ({ Phase: s.phase_name, Status: s.status, 'Started At': s.started_at, 'Completed At': s.completed_at, 'Commit Hash': s.commit_hash?.substring(0, 8), }))); } async inspectPhase(phaseNum) { const repoId = this.getRepoId(); const data = await this.loadPhaseData(phaseNum, repoId); console.log(`\n=== Phase ${phaseNum} Data ===`); console.log(JSON.stringify(data, null, 2)); } createEmptyResult() { return { totalNodes: 0, totalEdges: 0, processedFiles: 0, isIncremental: false, duration: 0, phases: [], errors: [], }; } async cleanup() { if (this.sqliteClient) { this.sqliteClient.disconnect(); } } } exports.PhaseManager = PhaseManager;