hikma-engine
Version:
Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents
210 lines (209 loc) • 10.6 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.PhaseRepository = void 0;
const GenericRepository_1 = require("./repository/GenericRepository");
const models_1 = require("./models");
/**
* Phase-specific repository that handles persistence for each indexing phase
*/
class PhaseRepository {
constructor(db) {
this.db = db;
this.repositoryRepo = new GenericRepository_1.GenericRepository(db, 'repositories');
this.fileRepo = new GenericRepository_1.GenericRepository(db, 'files');
this.phaseStatusRepo = new GenericRepository_1.GenericRepository(db, 'phase_status');
this.graphNodeRepo = new GenericRepository_1.GenericRepository(db, 'graph_nodes');
this.graphEdgeRepo = new GenericRepository_1.GenericRepository(db, 'graph_edges');
this.embeddingNodeRepo = new GenericRepository_1.GenericRepository(db, 'embedding_nodes');
}
// ============================================================================
// PHASE STATUS MANAGEMENT
// ============================================================================
async isPhaseComplete(repoId, phaseName) {
const status = await this.phaseStatusRepo.search({
repo_id: repoId,
phase_name: phaseName
});
return status.length > 0 && status[0].status === 'completed';
}
async markPhaseStarted(repoId, phaseName, commitHash) {
const phaseId = `${repoId}-${phaseName}`;
const phaseStatus = new models_1.PhaseStatusDTO(phaseId, repoId, phaseName, 'running');
phaseStatus.started_at = new Date().toISOString();
if (commitHash)
phaseStatus.commit_hash = commitHash;
await this.phaseStatusRepo.add(phaseStatus);
}
async markPhaseCompleted(repoId, phaseName, stats) {
const phaseId = `${repoId}-${phaseName}`;
const existing = await this.phaseStatusRepo.get(phaseId);
if (existing) {
existing.status = 'completed';
existing.completed_at = new Date().toISOString();
if (stats)
existing.stats = JSON.stringify(stats);
await this.phaseStatusRepo.add(existing);
}
}
async markPhaseFailed(repoId, phaseName, error) {
const phaseId = `${repoId}-${phaseName}`;
const existing = await this.phaseStatusRepo.get(phaseId);
if (existing) {
existing.status = 'failed';
existing.stats = JSON.stringify({ error });
await this.phaseStatusRepo.add(existing);
}
}
async getPhaseStatuses(repoId) {
return await this.phaseStatusRepo.search({ repo_id: repoId });
}
// ============================================================================
// PHASE 1: DATA DISCOVERY PERSISTENCE
// ============================================================================
async persistPhase1Data(data) {
const transaction = this.db.transaction(() => {
// Save repository
this.repositoryRepo.add(data.repository);
// Save files in batch
if (data.files.length > 0) {
this.fileRepo.batchAdd(data.files);
}
});
transaction();
}
async loadPhase1Data(repoId) {
const repository = await this.repositoryRepo.get(repoId);
const files = await this.fileRepo.search({ repo_id: repoId });
return { repository, files };
}
// ============================================================================
// PHASE 2: STRUCTURE EXTRACTION PERSISTENCE
// ============================================================================
async persistPhase2Data(data) {
const transaction = this.db.transaction(() => {
// Convert AST nodes to GraphNodeDTOs
const graphNodeDTOs = data.astNodes.map(node => this.convertAstNodeToGraphNodeDTO(node, data.repoId));
// Save AST nodes as graph nodes in batch
if (graphNodeDTOs.length > 0) {
this.graphNodeRepo.batchAdd(graphNodeDTOs);
}
// Convert AST edges to GraphEdgeDTOs and save them
if (data.astEdges && data.astEdges.length > 0) {
const graphEdgeDTOs = data.astEdges.map(edge => this.convertAstEdgeToGraphEdgeDTO(edge));
this.graphEdgeRepo.batchAdd(graphEdgeDTOs);
}
});
transaction();
}
async loadPhase2Data(repoId) {
const astNodes = await this.graphNodeRepo.search({
repo_id: repoId
});
// Load edges by finding all edges where source or target nodes belong to this repo
const nodeIds = astNodes.map(node => node.id);
const astEdges = [];
if (nodeIds.length > 0) {
// This is a simplified approach - in practice you might want to optimize this query
const allEdges = await this.graphEdgeRepo.getAll();
astEdges.push(...allEdges.filter(edge => nodeIds.includes(edge.source_id) || nodeIds.includes(edge.target_id)));
}
return { astNodes, astEdges };
}
convertAstNodeToGraphNodeDTO(astNode, repoId) {
// Extract key properties for easier querying
const properties = astNode.properties || {};
// Ensure all IDs are strings
const nodeId = typeof astNode.id === 'string' ? astNode.id : String(astNode.id);
const nodeType = typeof astNode.type === 'string' ? astNode.type : String(astNode.type);
return new models_1.GraphNodeDTO(nodeId, nodeId, // business_key same as id for AST nodes
nodeType, JSON.stringify(properties), // Store all properties as JSON
{
repo_id: repoId,
file_path: properties?.filePath ? String(properties.filePath) : undefined,
line: typeof properties?.startLine === 'number' ? properties.startLine : undefined,
col: typeof properties?.startColumn === 'number' ? properties.startColumn : 0,
signature_hash: this.generateSignatureHash(properties?.signature || properties?.name || nodeId)
});
}
generateSignatureHash(signature) {
// Simple hash for signature - could be made more sophisticated
return require('crypto').createHash('md5').update(signature).digest('hex');
}
convertAstEdgeToGraphEdgeDTO(astEdge) {
// Ensure all IDs are strings
const edgeId = typeof astEdge.id === 'string' ? astEdge.id : String(astEdge.id || `edge_${Date.now()}_${Math.random()}`);
const sourceId = typeof astEdge.source === 'string' ? astEdge.source : String(astEdge.source);
const targetId = typeof astEdge.target === 'string' ? astEdge.target : String(astEdge.target);
const edgeType = typeof astEdge.type === 'string' ? astEdge.type : String(astEdge.type);
return new models_1.GraphEdgeDTO(edgeId, sourceId, targetId, sourceId, // business_key same as source for AST edges
targetId, // business_key same as target for AST edges
edgeType, {
properties: astEdge.properties ? JSON.stringify(astEdge.properties) : undefined,
line: typeof astEdge.line === 'number' ? astEdge.line : undefined,
col: typeof astEdge.col === 'number' ? astEdge.col : undefined,
dynamic: Boolean(astEdge.dynamic)
});
}
// ============================================================================
// PHASE 3: ENRICHMENT PERSISTENCE
// ============================================================================
async persistPhase4Data(data) {
const transaction = this.db.transaction(() => {
const graphNodeDTOs = [];
const embeddingNodeDTOs = [];
for (const node of data.finalNodes) {
// Create GraphNodeDTO without embedding
const { embedding, ...nodeWithoutEmbedding } = node;
const graphNodeDTO = this.convertAstNodeToGraphNodeDTO(nodeWithoutEmbedding, data.repoId);
graphNodeDTOs.push(graphNodeDTO);
// If embedding exists, create EmbeddingNodeDTO with metadata
if (embedding) {
// Extract source text and metadata from the node
const sourceText = node.sourceText;
const properties = node.properties || {};
const embeddingNodeDTO = new models_1.EmbeddingNodeDTO(node.id, // Use node ID as the ID for the embedding record
node.id, JSON.stringify(embedding), // Serialize embedding array to JSON string
sourceText, // The text that was embedded
node.type, // Node type (e.g., 'CodeNode')
properties.filePath || null);
embeddingNodeDTOs.push(embeddingNodeDTO);
}
}
console.log(`[DEBUG] About to persist ${graphNodeDTOs.length} graph nodes and ${embeddingNodeDTOs.length} embedding nodes`);
if (graphNodeDTOs.length > 0) {
this.graphNodeRepo.batchAdd(graphNodeDTOs);
}
if (embeddingNodeDTOs.length > 0) {
console.log(`[DEBUG] Persisting ${embeddingNodeDTOs.length} embedding nodes`);
this.embeddingNodeRepo.batchAdd(embeddingNodeDTOs);
console.log(`[DEBUG] Successfully persisted embedding nodes`);
}
const graphEdgeDTOs = data.finalEdges.map(edge => this.convertAstEdgeToGraphEdgeDTO(edge));
if (graphEdgeDTOs.length > 0) {
this.graphEdgeRepo.batchAdd(graphEdgeDTOs);
}
});
transaction();
}
// ============================================================================
// UTILITY METHODS
// ============================================================================
async clearPhaseData(repoId, phaseName) {
// Implementation depends on which phase data to clear
// This allows for re-running specific phases
}
async getPhaseStats(repoId) {
const statuses = await this.getPhaseStatuses(repoId);
const stats = {};
for (const status of statuses) {
stats[status.phase_name] = {
status: status.status,
started_at: status.started_at,
completed_at: status.completed_at,
stats: status.stats ? JSON.parse(status.stats) : null
};
}
return stats;
}
}
exports.PhaseRepository = PhaseRepository;