hikma-engine
Version:
Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents
445 lines (444 loc) • 18.2 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.PhaseManager = void 0;
const logger_1 = require("../utils/logger");
const PhaseRepository_1 = require("../persistence/PhaseRepository");
const connection_1 = require("../persistence/db/connection");
const schema_1 = require("../persistence/db/schema");
const indexing_1 = require("./indexing");
const models_1 = require("../persistence/models");
const path = __importStar(require("path"));
/**
* Manages the phase-by-phase execution of the indexing pipeline
*/
class PhaseManager {
constructor(projectRoot, config) {
this.errors = [];
this.projectRoot = projectRoot;
this.config = config;
this.logger = (0, logger_1.getLogger)('PhaseManager');
}
async initialize() {
// Initialize database connection
const dbConfig = this.config.getDatabaseConfig();
this.sqliteClient = new connection_1.SQLiteClient(dbConfig.sqlite.path);
await this.sqliteClient.connect();
// Initialize schema
(0, schema_1.initializeTables)(this.sqliteClient);
// Initialize phase repository
this.phaseRepo = new PhaseRepository_1.PhaseRepository(this.sqliteClient.getDb());
this.logger.info('PhaseManager initialized successfully');
}
async executePhases(options = {}) {
const startTime = Date.now();
const phases = [];
try {
// Handle special commands first
if (options.showStatus) {
await this.showPhaseStatus();
return this.createEmptyResult();
}
if (options.inspectPhase) {
await this.inspectPhase(options.inspectPhase);
return this.createEmptyResult();
}
// Determine indexing strategy
const strategy = new indexing_1.IndexingStrategy(this.projectRoot, this.config, this.logger);
const indexingStrategy = await strategy.determine(false); // TODO: Add force option
const repoId = this.getRepoId();
// Ensure repository record exists before any phase operations
await this.ensureRepositoryExists(repoId);
// Execute phases based on options
const phasesToRun = this.determinePhasesToRun(options);
let phase1Data = null;
let phase2Data = null;
let phase3Data = null;
for (const phaseNum of phasesToRun) {
const phaseResult = await this.executePhase(phaseNum, {
repoId,
indexingStrategy,
phase1Data,
phase2Data,
phase3Data,
options,
});
phases.push(phaseResult);
// Store phase data for next phases
if (phaseNum === 1)
phase1Data = phaseResult.data;
if (phaseNum === 2)
phase2Data = phaseResult.data;
if (phaseNum === 3)
phase3Data = phaseResult.data;
}
// Calculate totals from the final phase data
const finalPhaseData = phases.find(p => p.phase === 4)?.data;
const totalNodes = finalPhaseData?.finalNodes?.length || 0;
const totalEdges = finalPhaseData?.finalEdges?.length || 0;
return {
totalNodes,
totalEdges,
processedFiles: phase1Data?.files?.length || 0,
isIncremental: indexingStrategy.isIncremental,
duration: Date.now() - startTime,
phases,
errors: [...this.errors],
};
}
catch (error) {
this.logger.error('Phase execution failed', { error });
throw error;
}
}
async executePhase(phaseNum, context) {
const startTime = Date.now();
const phaseName = this.getPhaseNameByNumber(phaseNum);
this.logger.info(`=== Phase ${phaseNum}: ${phaseName} ===`);
try {
// Check if phase should be skipped
const shouldForce = context.options.forcePhases?.includes(phaseNum);
const isComplete = await this.phaseRepo.isPhaseComplete(context.repoId, phaseName);
if (isComplete && !shouldForce) {
this.logger.info(`Phase ${phaseNum} already complete, loading from database`);
const data = await this.loadPhaseData(phaseNum, context.repoId);
return {
phase: phaseNum,
name: phaseName,
duration: Date.now() - startTime,
itemsProcessed: this.getItemCount(data),
fromCache: true,
data,
};
}
// Mark phase as started
await this.phaseRepo.markPhaseStarted(context.repoId, phaseName, context.indexingStrategy.currentCommitHash);
// Execute the actual phase
const data = await this.executePhaseLogic(phaseNum, context);
// Persist phase data
if (!context.options.dryRun) {
await this.persistPhaseData(phaseNum, context.repoId, data);
await this.phaseRepo.markPhaseCompleted(context.repoId, phaseName, {
itemsProcessed: this.getItemCount(data),
});
}
this.logger.info(`Phase ${phaseNum} completed successfully`, {
itemsProcessed: this.getItemCount(data),
duration: Date.now() - startTime,
});
return {
phase: phaseNum,
name: phaseName,
duration: Date.now() - startTime,
itemsProcessed: this.getItemCount(data),
fromCache: false,
data,
};
}
catch (error) {
await this.phaseRepo.markPhaseFailed(context.repoId, phaseName, error.message);
throw error;
}
}
async executePhaseLogic(phaseNum, context) {
switch (phaseNum) {
case 1:
return await this.executePhase1(context);
case 2:
return await this.executePhase2(context);
case 3:
return await this.executePhase3(context);
case 4:
return await this.executePhase4(context);
default:
throw new Error(`Unknown phase: ${phaseNum}`);
}
}
async executePhase1(context) {
// Phase 1: Data Discovery
const fileDiscovery = new indexing_1.FileDiscovery(this.projectRoot, this.config);
const filesToProcess = await fileDiscovery.discoverFiles(context.indexingStrategy.changedFiles);
const nodeCreator = new indexing_1.NodeCreator();
const repoNode = nodeCreator.createRepositoryNode(this.projectRoot);
const fileNodes = nodeCreator.createFileNodes(filesToProcess, context.repoId); // Use consistent repo ID
// Convert to DTOs
const repositoryDTO = new models_1.RepositoryDTO(context.repoId, // Use consistent repo ID
repoNode.properties.repoPath, repoNode.properties.repoName);
const fileDTOs = fileNodes.map((node) => new models_1.FileDTO(node.id, node.properties.repoId, node.properties.filePath, node.properties.fileName, {
file_extension: node.properties.fileExtension,
language: node.properties.language,
size_kb: node.properties.sizeKb,
content_hash: node.properties.contentHash,
file_type: node.properties.fileType,
}));
return {
repository: repositoryDTO,
files: fileDTOs,
originalFileMetadata: filesToProcess,
};
}
async executePhase2(context) {
// Phase 2: AST Parsing and Structure Extraction
const astExtractor = new indexing_1.AstExtractor(this.projectRoot, this.config, context.phase1Data.repository.id);
const pathToIdMap = new Map();
context.phase1Data.files.forEach((file) => {
pathToIdMap.set(file.file_path, file.id);
});
const { nodes: astNodes, edges: astEdges } = await astExtractor.extract(context.phase1Data.originalFileMetadata, pathToIdMap);
return {
astNodes,
astEdges,
};
}
async executePhase3(context) {
// Phase 3: AI Summary Generation
if (context.options.skipAISummary) {
return { summaries: [] };
}
const summaryExtractor = new indexing_1.SummaryExtractor(this.config);
const nodesToSummarize = [
...context.phase1Data.files,
...context.phase2Data.astNodes,
];
const nodesWithSummaries = await summaryExtractor.extract(nodesToSummarize);
// Sanitize the result to avoid circular references in logging
const sanitizedSummaries = nodesWithSummaries.map(node => {
if (node.type === 'FunctionNode') {
// Remove potentially circular reference arrays for logging
const { callsMethods, calledByMethods, internalCallGraph, ...safeProperties } = node.properties;
return {
...node,
properties: {
...safeProperties,
callsMethodsCount: callsMethods?.length || 0,
calledByMethodsCount: calledByMethods?.length || 0,
internalCallGraphCount: internalCallGraph?.length || 0
}
};
}
return node;
});
return {
summaries: sanitizedSummaries,
summariesCount: nodesWithSummaries.length
};
}
async executePhase4(context) {
// Phase 4: Final Assembly and Vector Embeddings
// Aggregate all nodes and edges from previous phases
const allNodes = [];
const allEdges = [];
// Add AST nodes and edges from phase 2
if (context.phase2Data?.astNodes) {
allNodes.push(...context.phase2Data.astNodes);
}
if (context.phase2Data?.astEdges) {
allEdges.push(...context.phase2Data.astEdges);
}
// Add enriched nodes from phase 3 (summaries)
if (context.phase3Data?.summaries) {
// Replace or merge with existing nodes that have summaries
const summaryMap = new Map();
context.phase3Data.summaries.forEach((node) => {
summaryMap.set(node.id, node);
});
// Update existing nodes with summaries or add new ones
for (let i = 0; i < allNodes.length; i++) {
const existingNode = allNodes[i];
if (summaryMap.has(existingNode.id)) {
allNodes[i] = summaryMap.get(existingNode.id);
summaryMap.delete(existingNode.id);
}
}
}
// Phase 4 Part 2: Vector Embeddings
if (context.options.skipEmbeddings) {
this.logger.info('Skipping vector embedding generation.');
return {
finalNodes: allNodes,
finalEdges: allEdges,
};
}
this.logger.info('Starting vector embedding generation...');
// types of the nodes for logging purpose
const tempNodes = allNodes.filter((node) => node.type === undefined);
this.logger.info(`filtered nodes: ${tempNodes.length}`, tempNodes);
const embeddingExtractor = new indexing_1.EmbeddingExtractor(this.config);
const nodesWithEmbeddings = await embeddingExtractor.extract(allNodes);
this.logger.info(`Phase 4 final assembly and embedding completed`, {
totalNodes: nodesWithEmbeddings.length,
totalEdges: allEdges.length
});
return {
finalNodes: nodesWithEmbeddings,
finalEdges: allEdges,
};
}
async persistPhaseData(phaseNum, repoId, data) {
switch (phaseNum) {
case 1:
await this.phaseRepo.persistPhase1Data(data);
break;
case 2:
await this.phaseRepo.persistPhase2Data({
repoId,
astNodes: data.astNodes || [],
astEdges: data.astEdges || []
});
break;
case 4:
await this.phaseRepo.persistPhase4Data({
repoId,
finalNodes: data.finalNodes || [],
finalEdges: data.finalEdges || []
});
break;
// TODO: Add phase 3
}
}
async loadPhaseData(phaseNum, repoId) {
switch (phaseNum) {
case 1:
return await this.phaseRepo.loadPhase1Data(repoId);
case 2:
return await this.phaseRepo.loadPhase2Data(repoId);
// TODO: Add other phases
default:
return null;
}
}
determinePhasesToRun(options) {
if (options.runPhases) {
return options.runPhases;
}
if (options.fromPhase) {
return [
options.fromPhase,
options.fromPhase + 1,
options.fromPhase + 2,
options.fromPhase + 3,
].filter((p) => p <= 4);
}
return [1, 2, 3, 4]; // All phases by default
}
getPhaseNameByNumber(phaseNum) {
const names = {
1: 'data_discovery',
2: 'structure_extraction',
3: 'ai_enrichment',
4: 'final_assembly',
};
return names[phaseNum] || `phase_${phaseNum}`;
}
getItemCount(data) {
if (!data)
return 0;
if (data.files)
return data.files.length;
if (data.astNodes)
return data.astNodes.length;
if (data.summaries)
return data.summaries.length;
return 0;
}
getRepoId() {
// Generate consistent repo ID based on project path
const crypto = require('crypto');
const normalizedPath = path.resolve(this.projectRoot);
return crypto.createHash('sha256').update(normalizedPath).digest('hex').substring(0, 16);
}
async ensureRepositoryExists(repoId) {
try {
// Check if repository already exists
const existingRepo = await this.phaseRepo.loadPhase1Data(repoId);
if (existingRepo.repository) {
this.logger.debug('Repository record already exists', { repoId });
return;
}
// Create repository record
const nodeCreator = new indexing_1.NodeCreator();
const repoNode = nodeCreator.createRepositoryNode(this.projectRoot);
const repositoryDTO = new models_1.RepositoryDTO(repoId, // Use consistent repo ID
repoNode.properties.repoPath, repoNode.properties.repoName);
// Persist repository record
await this.phaseRepo.persistPhase1Data({
repository: repositoryDTO,
files: []
});
this.logger.debug('Repository record created', { repoId, repoPath: repositoryDTO.repo_path });
}
catch (error) {
this.logger.warn('Failed to ensure repository exists, will create during Phase 1', {
error: error.message
});
// Don't throw - Phase 1 will handle repository creation
}
}
async showPhaseStatus() {
const repoId = this.getRepoId();
const statuses = await this.phaseRepo.getPhaseStatuses(repoId);
console.log('\n=== Phase Status ===');
console.table(statuses.map((s) => ({
Phase: s.phase_name,
Status: s.status,
'Started At': s.started_at,
'Completed At': s.completed_at,
'Commit Hash': s.commit_hash?.substring(0, 8),
})));
}
async inspectPhase(phaseNum) {
const repoId = this.getRepoId();
const data = await this.loadPhaseData(phaseNum, repoId);
console.log(`\n=== Phase ${phaseNum} Data ===`);
console.log(JSON.stringify(data, null, 2));
}
createEmptyResult() {
return {
totalNodes: 0,
totalEdges: 0,
processedFiles: 0,
isIncremental: false,
duration: 0,
phases: [],
errors: [],
};
}
async cleanup() {
if (this.sqliteClient) {
this.sqliteClient.disconnect();
}
}
}
exports.PhaseManager = PhaseManager;