UNPKG

hikma-engine

Version:

Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents

1,057 lines (1,056 loc) 48.7 kB
"use strict"; /** * @file Responsible for loading processed nodes and edges into the unified SQLite persistence layer. * Manages data persistence across SQLite database with vector support via sqlite-vec extension. */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.DataLoader = void 0; const crypto = __importStar(require("crypto")); const connection_1 = require("../persistence/db/connection"); const logger_1 = require("../utils/logger"); const error_handling_1 = require("../utils/error-handling"); /** * Loads processed data into the unified SQLite database system. */ class DataLoader { /** * Initializes the DataLoader with database connection parameters. * @param {string} sqlitePath - Path to the SQLite database file. * @param {ConfigManager} config - Configuration manager instance. */ constructor(sqlitePath, config) { this.logger = (0, logger_1.getLogger)('DataLoader'); this.sqlitePath = sqlitePath; this.config = config; // Initialize database client this.sqliteClient = new connection_1.SQLiteClient(sqlitePath); this.logger.info('DataLoader initialized', { sqlitePath, }); } /** * Establishes connections to SQLite database. */ async connectToDatabases() { const operation = this.logger.operation('Connecting to SQLite database'); const connectionStatus = { sqlite: false, }; this.logger.info('Connecting to SQLite database'); // Connect to SQLite database with error handling try { await this.connectToSQLite(); connectionStatus.sqlite = true; this.logger.info('SQLite connected successfully'); } catch (error) { this.logger.error('Failed to connect to SQLite', { error: (0, error_handling_1.getErrorMessage)(error) }); throw error; } const connectedCount = Object.values(connectionStatus).filter(Boolean).length; this.logger.info(`Connected to ${connectedCount}/1 databases`, connectionStatus); // Require SQLite database to be connected if (connectedCount === 0) { throw new error_handling_1.DatabaseConnectionError('SQLite', 'Failed to connect to SQLite database'); } operation(); return connectionStatus; } /** * Connects to SQLite with retry logic and circuit breaker. */ async connectToSQLite() { try { await (0, error_handling_1.withRetry)(async () => { await this.sqliteClient.connect(); }, error_handling_1.DEFAULT_RETRY_CONFIG, this.logger, 'SQLite connection'); } catch (error) { this.logger.error('Failed to connect to SQLite after all retries', { error: (0, error_handling_1.getErrorMessage)(error) }); throw new error_handling_1.DatabaseConnectionError('SQLite', `Connection failed: ${(0, error_handling_1.getErrorMessage)(error)}`, error); } } /** * Disconnects from SQLite database. */ async disconnectFromDatabases() { const operation = this.logger.operation('Disconnecting from SQLite database'); this.logger.info('Disconnecting from SQLite database'); // Disconnect from SQLite with error handling try { this.sqliteClient.disconnect(); this.logger.info('SQLite disconnected successfully'); } catch (error) { this.logger.warn('Failed to disconnect from SQLite', { error: (0, error_handling_1.getErrorMessage)(error) }); } this.logger.info('SQLite database disconnection completed'); operation(); } /** * Loads nodes and edges into SQLite enhanced graph storage for deep relationship queries. * @param {NodeWithEmbedding[]} nodes - Array of nodes. * @param {Edge[]} edges - Array of edges. */ async batchLoadToGraphDB(nodes, edges) { const operation = this.logger.operation(`Loading ${nodes.length} nodes and ${edges.length} edges to Enhanced SQLite Graph`); try { this.logger.info(`Starting enhanced SQLite graph batch load for ${nodes.length} nodes and ${edges.length} edges`); // Convert NodeWithEmbedding to EnhancedBaseNode format const enhancedNodes = nodes.map(node => ({ id: node.id, businessKey: node.id, // For now, use ID as business key - will be enhanced by AST parser type: node.type, properties: node.properties, repoId: node.properties.repoId || node.properties.repoPath, commitSha: undefined, // Will be set by git analyzer filePath: node.properties.filePath || node.properties.path, line: node.properties.startLine, col: node.properties.startCol, signatureHash: this.generateSignatureHash(node), labels: node.properties.labels || [] })); // Convert Edge to EnhancedEdge format const enhancedEdges = edges.map(edge => ({ id: `${edge.source}-${edge.type}-${edge.target}`, source: edge.source, target: edge.target, sourceBusinessKey: edge.source, // Will be enhanced targetBusinessKey: edge.target, // Will be enhanced type: edge.type, properties: edge.properties, line: edge.properties?.line, col: edge.properties?.col, dynamic: edge.properties?.dynamic || false })); // TODO: Implement enhanced graph storage methods in SQLiteClient // For now, log the data that would be inserted this.logger.debug(`Would insert ${enhancedNodes.length} enhanced nodes and ${enhancedEdges.length} enhanced edges to SQLite graph`); // Placeholder results for now const nodeResult = { success: enhancedNodes.length, failed: 0, errors: [] }; const edgeResult = { success: enhancedEdges.length, failed: 0, errors: [] }; this.logger.info('Enhanced SQLite graph batch load completed', { nodesAdded: nodeResult.success, nodesFailed: nodeResult.failed, edgesAdded: edgeResult.success, edgesFailed: edgeResult.failed, totalNodes: nodes.length, totalEdges: edges.length }); operation(); } catch (error) { this.logger.error('Enhanced SQLite graph batch load failed', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } } /** * Generate a signature hash for duplicate detection. */ generateSignatureHash(node) { const crypto = require('crypto'); const signature = `${node.type}:${node.properties.name || node.properties.fileName}:${node.properties.signature || ''}`; return crypto.createHash('md5').update(signature).digest('hex'); } /** * Loads node metadata into SQLite for fast lookups and keyword search with transaction management. * @param {NodeWithEmbedding[]} nodes - Array of nodes. * @param {Edge[]} edges - Array of edges. */ async batchLoadToSqlite(nodes, edges) { const operation = this.logger.operation(`Loading ${nodes.length} nodes to SQLite`); try { this.logger.info(`Starting SQLite batch load for ${nodes.length} nodes`); // Use transaction for data consistency this.sqliteClient.transaction(() => { // Group nodes by type for efficient batch processing const nodesByType = this.groupNodesByType(nodes); // Prepare batch insert statements const statements = this.prepareSQLiteStatements(); // Process each node type within the transaction in dependency order const insertionOrder = [ 'RepositoryNode', 'FileNode', // Must come before FunctionNode due to foreign key 'CodeNode', 'CommitNode', 'TestNode', 'PullRequestNode', 'FunctionNode' // Depends on FileNode ]; for (const nodeType of insertionOrder) { const typeNodes = nodesByType[nodeType]; if (!typeNodes || typeNodes.length === 0) continue; this.logger.debug(`Loading ${typeNodes.length} ${nodeType} nodes to SQLite`); try { switch (nodeType) { case 'RepositoryNode': this.insertRepositoryNodesSync(typeNodes, statements.repositories); break; case 'FileNode': this.insertFileNodesSync(typeNodes, statements.files); break; case 'CodeNode': this.insertCodeNodesSync(typeNodes, statements.codeNodes); break; case 'CommitNode': this.insertCommitNodesSync(typeNodes, statements.commits); break; case 'TestNode': this.insertTestNodesSync(typeNodes, statements.testNodes); break; case 'PullRequestNode': this.insertPullRequestNodesSync(typeNodes, statements.pullRequests); break; case 'FunctionNode': this.insertFunctionNodesSync(typeNodes, statements.functions); break; default: this.logger.warn(`Unknown node type for SQLite insertion: ${nodeType}`); } } catch (nodeError) { this.logger.error(`Failed to insert ${nodeType} nodes`, { error: (0, error_handling_1.getErrorMessage)(nodeError) }); throw nodeError; // This will cause the transaction to rollback } } // Insert edges within the same transaction try { this.insertFunctionCallsSync(edges, statements.functionCalls); } catch (edgeError) { this.logger.error('Failed to insert function calls', { error: (0, error_handling_1.getErrorMessage)(edgeError) }); throw edgeError; // This will cause the transaction to rollback } // Update indexing state within transaction try { const timestamp = new Date().toISOString(); this.sqliteClient.run(`INSERT OR REPLACE INTO indexing_state (key, value, updated_at) VALUES (?, ?, ?)`, ['last_load_timestamp', timestamp, timestamp]); } catch (stateError) { this.logger.warn('Failed to update indexing state', { error: (0, error_handling_1.getErrorMessage)(stateError) }); // Don't throw here as this is not critical } }); this.logger.info('SQLite batch load completed successfully with transaction'); operation(); } catch (error) { this.logger.error('SQLite batch load failed, transaction rolled back', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } } /** * Loads nodes with embeddings into unified SQLite storage for both relational and vector operations. * Uses the existing transaction-based approach with vector storage. * @param {NodeWithEmbedding[]} nodes - Array of nodes with embeddings. */ async batchLoadToSQLiteWithVectors(nodes) { const operation = this.logger.operation(`Loading ${nodes.length} nodes with vectors to SQLite`); try { this.logger.info(`Starting unified SQLite batch load for ${nodes.length} nodes with embeddings`); // Use the existing transaction-based approach which already works await this.batchLoadToSqlite(nodes, []); // Pass empty edges array since we handle them separately // Store vector embeddings for nodes that have them await this.storeVectorEmbeddings(nodes); this.logger.info('Unified SQLite batch load with vectors completed successfully'); operation(); } catch (error) { this.logger.error('Unified SQLite batch load with vectors failed', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } } /** * Stores vector embeddings for nodes using the existing SQLiteClient vector operations. * @param {NodeWithEmbedding[]} nodes - Array of nodes with embeddings. */ async storeVectorEmbeddings(nodes) { if (!this.sqliteClient.isVectorEnabled) { this.logger.info('Vector operations not enabled, skipping embedding storage'); return; } this.logger.debug(`Storing embeddings for ${nodes.length} nodes`); for (const node of nodes) { if (!node.embedding || node.embedding.length === 0) { continue; // Skip nodes without embeddings } try { // Determine the appropriate table and column based on node type let table; let column; switch (node.type) { case 'FileNode': table = 'files'; column = 'content_embedding'; break; case 'FunctionNode': table = 'functions'; column = 'signature_embedding'; break; case 'CommitNode': table = 'commits'; column = 'message_embedding'; break; case 'CodeNode': table = 'code_nodes'; column = 'code_embedding'; break; case 'TestNode': table = 'test_nodes'; column = 'test_embedding'; break; case 'PullRequestNode': table = 'pull_requests'; column = 'title_embedding'; break; default: this.logger.debug(`No embedding storage configured for node type: ${node.type}`); continue; } await this.sqliteClient.storeVector(table, column, node.id, node.embedding); } catch (error) { this.logger.warn(`Failed to store embedding for node ${node.id}`, { error: (0, error_handling_1.getErrorMessage)(error), nodeType: node.type }); } } this.logger.debug('Vector embedding storage completed'); } /** * Groups nodes by their type for efficient processing. * @param {NodeWithEmbedding[]} nodes - Array of nodes to group. * @returns {Record<string, NodeWithEmbedding[]>} Nodes grouped by type. */ groupNodesByType(nodes) { const grouped = {}; for (const node of nodes) { if (!grouped[node.type]) { grouped[node.type] = []; } grouped[node.type].push(node); } return grouped; } /** * Prepares SQLite prepared statements for batch inserts. * @returns {Record<string, any>} Prepared statements for each table. */ prepareSQLiteStatements() { return { repositories: this.sqliteClient.prepare(` INSERT OR REPLACE INTO repositories (repo_id, repo_path, repo_name, created_at, last_updated) VALUES (?, ?, ?, ?, ?) `), files: this.sqliteClient.prepare(` INSERT OR REPLACE INTO files (file_id, repo_id, file_path, file_name, file_extension, language, size_kb, content_hash, file_type, ai_summary, imports, exports, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `), codeNodes: this.sqliteClient.prepare(` INSERT OR REPLACE INTO code_nodes (id, name, signature, language, file_path, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?) `), commits: this.sqliteClient.prepare(` INSERT OR REPLACE INTO commits (id, hash, author, date, message, diff_summary) VALUES (?, ?, ?, ?, ?, ?) `), testNodes: this.sqliteClient.prepare(` INSERT OR REPLACE INTO test_nodes (id, name, file_path, start_line, end_line, framework) VALUES (?, ?, ?, ?, ?, ?) `), pullRequests: this.sqliteClient.prepare(` INSERT OR REPLACE INTO pull_requests (id, pr_id, title, author, created_at_pr, merged_at, url, body) VALUES (?, ?, ?, ?, ?, ?, ?, ?) `), functions: this.sqliteClient.prepare(` INSERT OR REPLACE INTO functions (id, file_id, name, signature, return_type, access_level, file_path, start_line, end_line, body, called_by_methods, calls_methods, uses_external_methods, internal_call_graph, transitive_call_depth) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `), functionCalls: this.sqliteClient.prepare(` INSERT OR REPLACE INTO function_calls (id, caller_id, callee_id) VALUES (?, ?, ?) `), }; } /** * Inserts FileNode data into SQLite. */ async insertFileNodes(nodes, statement) { for (const node of nodes) { try { statement.run(node.id, // file_id node.properties.repoId || '', // repo_id node.properties.filePath, // file_path node.properties.fileName, // file_name node.properties.fileExtension || null, // file_extension node.properties.language || null, // language node.properties.sizeKb || null, // size_kb node.properties.contentHash || null, // content_hash node.properties.fileType || 'source', // file_type node.properties.aiSummary || null, // ai_summary JSON.stringify(node.properties.imports || []), // imports JSON.stringify(node.properties.exports || []), // exports new Date().toISOString(), // created_at new Date().toISOString() // updated_at ); } catch (error) { this.logger.warn(`Failed to insert FileNode: ${node.id}`, { error: (0, error_handling_1.getErrorMessage)(error) }); } } } /** * Inserts CodeNode data into SQLite. */ async insertCodeNodes(nodes, statement) { for (const node of nodes) { try { statement.run(node.id, node.properties.name, node.properties.signature || null, node.properties.language, node.properties.filePath, node.properties.startLine, node.properties.endLine); } catch (error) { this.logger.warn(`Failed to insert CodeNode: ${node.id}`, { error: (0, error_handling_1.getErrorMessage)(error) }); } } } /** * Inserts CommitNode data into SQLite. */ async insertCommitNodes(nodes, statement) { for (const node of nodes) { try { statement.run(node.id, node.properties.hash, node.properties.author, node.properties.date, node.properties.message, node.properties.diffSummary || null); } catch (error) { this.logger.warn(`Failed to insert CommitNode: ${node.id}`, { error: (0, error_handling_1.getErrorMessage)(error) }); } } } /** * Inserts TestNode data into SQLite. */ async insertTestNodes(nodes, statement) { for (const node of nodes) { try { statement.run(node.id, node.properties.name, node.properties.filePath, node.properties.startLine, node.properties.endLine, node.properties.framework || null); } catch (error) { this.logger.warn(`Failed to insert TestNode: ${node.id}`, { error: (0, error_handling_1.getErrorMessage)(error) }); } } } /** * Inserts PullRequestNode data into SQLite. */ async insertPullRequestNodes(nodes, statement) { for (const node of nodes) { try { statement.run(node.id, node.properties.prId, node.properties.title, node.properties.author, node.properties.createdAt, node.properties.mergedAt || null, node.properties.url, node.properties.body || null); } catch (error) { this.logger.warn(`Failed to insert PullRequestNode: ${node.id}`, { error: (0, error_handling_1.getErrorMessage)(error) }); } } } /** * Inserts FunctionNode data into SQLite. */ async insertFunctionNodes(nodes, statement) { for (const node of nodes) { try { statement.run(node.id, node.properties.fileId, node.properties.name, node.properties.signature, node.properties.returnType, node.properties.accessLevel, node.properties.startLine, node.properties.endLine, node.properties.usesExternalMethods, node.properties.transitiveCallDepth); } catch (error) { this.logger.warn(`Failed to insert FunctionNode: ${node.id}`, { error: (0, error_handling_1.getErrorMessage)(error) }); } } } async insertFunctionCalls(edges, statement) { for (const edge of edges) { if (edge.type === 'CALLS') { try { statement.run(crypto.randomUUID(), edge.source, edge.target); } catch (error) { this.logger.warn(`Failed to insert CALLS edge: ${edge.source} -> ${edge.target}`, { error: (0, error_handling_1.getErrorMessage)(error) }); } } } } async insertRepositoryNodes(nodes, statement) { for (const node of nodes) { try { statement.run(node.id, node.properties.repoPath, node.properties.repoName, node.properties.createdAt || new Date().toISOString(), node.properties.lastUpdated || new Date().toISOString()); } catch (error) { this.logger.warn(`Failed to insert RepositoryNode: ${node.id}`, { error: (0, error_handling_1.getErrorMessage)(error) }); } } } /** * Synchronous version for use within transactions. */ insertRepositoryNodesSync(nodes, statement) { for (const node of nodes) { statement.run(node.id, node.properties.repoPath, node.properties.repoName, node.properties.createdAt || new Date().toISOString(), node.properties.lastUpdated || new Date().toISOString()); } } /** * Synchronous version for use within transactions. */ insertFileNodesSync(nodes, statement) { for (const node of nodes) { statement.run(node.id, // file_id node.properties.repoId || '', // repo_id node.properties.filePath, // file_path node.properties.fileName, // file_name node.properties.fileExtension || null, // file_extension node.properties.language || null, // language node.properties.sizeKb || null, // size_kb node.properties.contentHash || null, // content_hash node.properties.fileType || 'source', // file_type node.properties.aiSummary || null, // ai_summary JSON.stringify(node.properties.imports || []), // imports JSON.stringify(node.properties.exports || []), // exports new Date().toISOString(), // created_at new Date().toISOString() // updated_at ); } } /** * Synchronous version for use within transactions. */ insertCodeNodesSync(nodes, statement) { for (const node of nodes) { statement.run(node.id, node.properties.name, node.properties.signature || null, node.properties.language, node.properties.filePath, node.properties.startLine, node.properties.endLine); } } /** * Synchronous version for use within transactions. */ insertCommitNodesSync(nodes, statement) { for (const node of nodes) { statement.run(node.id, node.properties.hash, node.properties.author, node.properties.date, node.properties.message, node.properties.diffSummary || null); } } /** * Synchronous version for use within transactions. */ insertTestNodesSync(nodes, statement) { for (const node of nodes) { statement.run(node.id, node.properties.name, node.properties.filePath, node.properties.startLine, node.properties.endLine, node.properties.framework || null); } } /** * Synchronous version for use within transactions. */ insertPullRequestNodesSync(nodes, statement) { for (const node of nodes) { statement.run(node.id, node.properties.prId, node.properties.title, node.properties.author, node.properties.createdAt, node.properties.mergedAt || null, node.properties.url, node.properties.body || null); } } /** * Synchronous version for use within transactions. */ insertFunctionNodesSync(nodes, statement) { for (const node of nodes) { statement.run(node.id, node.properties.fileId, node.properties.name, node.properties.signature, node.properties.returnType, node.properties.accessLevel, node.properties.filePath, node.properties.startLine, node.properties.endLine, node.properties.body, JSON.stringify(node.properties.calledByMethods || []), JSON.stringify(node.properties.callsMethods || []), node.properties.usesExternalMethods ? 1 : 0, JSON.stringify(node.properties.internalCallGraph || []), node.properties.transitiveCallDepth); } } /** * Synchronous version for use within transactions. */ insertFunctionCallsSync(edges, statement) { for (const edge of edges) { if (edge.type === 'CALLS') { statement.run(crypto.randomUUID(), edge.source, edge.target); } } } /** * Main method to load all nodes and edges into the dual persistence layer. * @param {NodeWithEmbedding[]} nodes - Array of nodes with embeddings. * @param {Edge[]} edges - Array of edges. */ async load(nodes, edges) { const operation = this.logger.operation(`Loading ${nodes.length} nodes and ${edges.length} edges to unified SQLite database`); const results = { sqlite: { success: false }, }; try { // Perform comprehensive data validation before persistence this.logger.debug('Validating data before persistence'); const validation = this.performDataValidation(nodes, edges); if (!validation.valid) { this.logger.error('Data validation failed', { errors: validation.errors }); throw new error_handling_1.DataValidationError('Data validation failed before persistence', validation.errors); } this.logger.info('Starting unified SQLite data loading', { totalNodes: nodes.length, totalEdges: edges.length, nodeTypes: this.getNodeTypeStats(nodes), validationPassed: true }); // Connect to SQLite database const connectionStatus = await this.connectToDatabases(); if (!connectionStatus.sqlite) { throw new Error('SQLite database not available for data loading'); } this.logger.info('SQLite database connected and ready for loading'); // Load data to unified SQLite storage with vectors and graph data try { // Load nodes with embeddings to SQLite (includes vector storage) await this.batchLoadToSQLiteWithVectors(nodes); // Load graph relationships to SQLite (enhanced graph storage) await this.batchLoadToGraphDB(nodes, edges); results.sqlite.success = true; this.logger.info('Unified SQLite data loading completed successfully'); } catch (sqliteError) { results.sqlite.error = (0, error_handling_1.getErrorMessage)(sqliteError); this.logger.error('SQLite data loading failed', { error: (0, error_handling_1.getErrorMessage)(sqliteError) }); throw sqliteError; } operation(); return { success: results.sqlite.success, results }; } catch (error) { this.logger.error('Unified SQLite data loading failed', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } finally { // Always disconnect from database try { await this.disconnectFromDatabases(); } catch (disconnectError) { this.logger.warn('Failed to disconnect from database', { error: (0, error_handling_1.getErrorMessage)(disconnectError) }); } } } /** * Gets statistics about the nodes by type. * @param {NodeWithEmbedding[]} nodes - Array of nodes. * @returns {Record<string, number>} Node type statistics. */ getNodeTypeStats(nodes) { const stats = {}; for (const node of nodes) { stats[node.type] = (stats[node.type] || 0) + 1; } return stats; } /** * Attempts to recover from partial failures by retrying failed operations. * @param {NodeWithEmbedding[]} nodes - Array of nodes to retry. * @param {Edge[]} edges - Array of edges to retry. * @param {string[]} failedDatabases - List of databases that failed. * @returns {Promise<{success: boolean, results: Record<string, {success: boolean, error?: string}>}>} */ async retryFailedOperations(nodes, edges, failedDatabases) { const operation = this.logger.operation(`Retrying failed operations for ${failedDatabases.join(', ')}`); const results = {}; try { this.logger.info('Starting retry operations', { failedDatabases, nodeCount: nodes.length, edgeCount: edges.length }); // Connect to databases const connectionStatus = await this.connectToDatabases(); // Retry each failed database const retryPromises = []; for (const dbName of failedDatabases) { switch (dbName) { case 'sqlite': if (connectionStatus.sqlite) { retryPromises.push(this.batchLoadToSQLiteWithVectors(nodes) .then(() => this.batchLoadToGraphDB(nodes, edges)) .then(() => { results.sqlite = { success: true }; this.logger.info('SQLite retry successful'); }) .catch((error) => { results.sqlite = { success: false, error: (0, error_handling_1.getErrorMessage)(error) }; this.logger.error('SQLite retry failed', { error: (0, error_handling_1.getErrorMessage)(error) }); })); } else { results.sqlite = { success: false, error: 'Database not connected' }; } break; default: this.logger.warn(`Unknown database for retry: ${dbName}`); } } // Wait for all retry operations await Promise.allSettled(retryPromises); const successfulRetries = Object.values(results).filter(result => result.success).length; const success = successfulRetries > 0; this.logger.info('Retry operations completed', { successful: successfulRetries, total: failedDatabases.length, results, }); operation(); return { success, results }; } catch (error) { this.logger.error('Retry operations failed', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } finally { await this.disconnectFromDatabases(); } } /** * Performs a health check on SQLite database and attempts basic recovery. * @returns {Promise<{healthy: string[], unhealthy: string[], recovered: string[]}>} */ async performHealthCheck() { const operation = this.logger.operation('Performing SQLite database health check'); const healthy = []; const unhealthy = []; const recovered = []; try { this.logger.info('Starting SQLite database health check'); // Check current connectivity const connectivity = await this.verifyDatabaseConnectivity(); // Categorize databases Object.entries(connectivity).forEach(([dbName, isHealthy]) => { if (isHealthy) { healthy.push(dbName); } else { unhealthy.push(dbName); } }); // Attempt recovery for unhealthy databases if (unhealthy.length > 0) { this.logger.info('Attempting recovery for unhealthy databases', { unhealthy }); for (const dbName of unhealthy) { try { switch (dbName) { case 'sqlite': await this.connectToSQLite(); if (this.sqliteClient.isConnectedToDatabase()) { recovered.push(dbName); healthy.push(dbName); unhealthy.splice(unhealthy.indexOf(dbName), 1); } break; } } catch (error) { this.logger.debug(`Recovery failed for ${dbName}`, { error: (0, error_handling_1.getErrorMessage)(error) }); } } } this.logger.info('SQLite database health check completed', { healthy: healthy.length, unhealthy: unhealthy.length, recovered: recovered.length, }); operation(); return { healthy, unhealthy, recovered }; } catch (error) { this.logger.error('SQLite database health check failed', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } finally { await this.disconnectFromDatabases(); } } /** * Verifies SQLite database connectivity before attempting operations. * @returns {Promise<{sqlite: boolean}>} */ async verifyDatabaseConnectivity() { const operation = this.logger.operation('Verifying SQLite database connectivity'); try { const connectivity = { sqlite: false, }; // Test SQLite database connection try { await this.verifySQLiteConnection(); connectivity.sqlite = true; } catch (error) { this.logger.debug('SQLite connectivity check failed', { error: (0, error_handling_1.getErrorMessage)(error) }); } this.logger.info('SQLite database connectivity verification completed', connectivity); operation(); return connectivity; } catch (error) { this.logger.error('SQLite database connectivity verification failed', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } } /** * Verifies SQLite connection. */ async verifySQLiteConnection() { if (!this.sqliteClient.isConnectedToDatabase()) { throw new Error('SQLite not connected'); } // Test with a simple query this.sqliteClient.get('SELECT 1 as test'); } /** * Implements data consistency checks across databases. * @param {NodeWithEmbedding[]} nodes - Array of nodes to verify. * @returns {Promise<{consistent: boolean, issues: string[]}>} */ async verifyDataConsistency(nodes) { const operation = this.logger.operation('Verifying data consistency across databases'); const issues = []; try { this.logger.info('Starting data consistency verification', { nodeCount: nodes.length }); // Connect to databases for verification const connectionStatus = await this.connectToDatabases(); // Check node counts across databases if (connectionStatus.sqlite) { try { // Get counts from SQLite const sqliteStats = await this.sqliteClient.getIndexingStats(); // TODO: Implement getEnhancedGraphStats method in SQLiteClient // const graphStats = await this.sqliteClient.getEnhancedGraphStats(); this.logger.debug('SQLite stats', { sqliteStats }); // Add specific consistency checks here // For example, verify that all nodes in regular tables have corresponding entries in graph tables } catch (error) { issues.push(`Failed to verify SQLite consistency: ${(0, error_handling_1.getErrorMessage)(error)}`); } } // Check for orphaned edges (edges without corresponding nodes) if (connectionStatus.sqlite) { try { const orphanedEdges = this.sqliteClient.all(` SELECT fc.id, fc.caller_id, fc.callee_id FROM function_calls fc LEFT JOIN functions f1 ON fc.caller_id = f1.id LEFT JOIN functions f2 ON fc.callee_id = f2.id WHERE f1.id IS NULL OR f2.id IS NULL LIMIT 10 `); if (orphanedEdges.length > 0) { issues.push(`Found ${orphanedEdges.length} orphaned function call edges in SQLite`); } } catch (error) { issues.push(`Failed to check for orphaned edges: ${(0, error_handling_1.getErrorMessage)(error)}`); } } const consistent = issues.length === 0; this.logger.info('Data consistency verification completed', { consistent, issueCount: issues.length }); operation(); return { consistent, issues }; } catch (error) { this.logger.error('Data consistency verification failed', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } finally { await this.disconnectFromDatabases(); } } /** * Gets loading statistics and SQLite database health information. * @returns {Promise<{databases: Record<string, boolean>, lastLoad: Date | null, connectivity: {sqlite: boolean}}>} */ async getStats() { try { // Check database connectivity const databases = { sqlite: this.sqliteClient.isConnectedToDatabase(), }; // Verify actual connectivity const connectivity = await this.verifyDatabaseConnectivity(); // Get last load timestamp from SQLite if available let lastLoad = null; if (connectivity.sqlite) { try { await this.connectToSQLite(); const lastIndexed = this.sqliteClient.getLastIndexedCommit(); if (lastIndexed) { // This is a commit hash, not a timestamp. We'd need to store actual load timestamps // For now, return null lastLoad = null; } this.sqliteClient.disconnect(); } catch (error) { this.logger.debug('Failed to get last load timestamp', { error: (0, error_handling_1.getErrorMessage)(error) }); } } return { databases, lastLoad, connectivity, }; } catch (error) { this.logger.error('Failed to get DataLoader stats', { error: (0, error_handling_1.getErrorMessage)(error) }); throw error; } } /** * Validates nodes before persistence to ensure data integrity. * @param nodes - Array of nodes to validate * @returns Validation result with errors if any */ validateNodes(nodes) { const errors = []; if (!Array.isArray(nodes)) { errors.push('Nodes must be an array'); return { valid: false, errors }; } for (const [index, node] of nodes.entries()) { if (!node.id || typeof node.id !== 'string' || node.id.trim() === '') { errors.push(`Node ${index}: ID is required and must be a non-empty string`); } if (!node.type || typeof node.type !== 'string' || node.type.trim() === '') { errors.push(`Node ${index}: type is required and must be a non-empty string`); } if (!node.properties || typeof node.properties !== 'object') { errors.push(`Node ${index}: properties is required and must be an object`); } if (!node.embedding || !Array.isArray(node.embedding)) { errors.push(`Node ${index}: embedding is required and must be an array`); } else if (node.embedding.length > 0 && !node.embedding.every(val => typeof val === 'number' && !isNaN(val))) { errors.push(`Node ${index}: embedding must contain only valid numbers`); } // Note: Empty embedding arrays are now allowed (for skipEmbeddings mode) } return { valid: errors.length === 0, errors }; } /** * Validates edges before persistence to ensure data integrity. * @param edges - Array of edges to validate * @returns Validation result with errors if any */ validateEdges(edges) { const errors = []; if (!Array.isArray(edges)) { errors.push('Edges must be an array'); return { valid: false, errors }; } for (const [index, edge] of edges.entries()) { if (!edge.source || typeof edge.source !== 'string' || edge.source.trim() === '') { errors.push(`Edge ${index}: source is required and must be a non-empty string`); } if (!edge.target || typeof edge.target !== 'string' || edge.target.trim() === '') { errors.push(`Edge ${index}: target is required and must be a non-empty string`); } if (!edge.type || typeof edge.type !== 'string' || edge.type.trim() === '') { errors.push(`Edge ${index}: type is required and must be a non-empty string`); } if (edge.properties && typeof edge.properties !== 'object') { errors.push(`Edge ${index}: properties must be an object if provided`); } } return { valid: errors.length === 0, errors }; } /** * Validates data consistency across nodes and edges. * @param nodes - Array of nodes * @param edges - Array of edges * @returns Validation result with errors if any */ validateDataConsistency(nodes, edges) { const errors = []; const nodeIds = new Set(nodes.map(node => node.id)); // Check that all edge sources and targets reference existing nodes for (const [index, edge] of edges.entries()) { if (!nodeIds.has(edge.source)) { errors.push(`Edge ${index}: source node '${edge.source}' does not exist in the provided nodes`); } if (!nodeIds.has(edge.target)) { errors.push(`Edge ${index}: target node '${edge.target}' does not exist in the provided nodes`); } } // Check for duplicate node IDs const duplicateIds = nodes .map(node => node.id) .filter((id, index, arr) => arr.indexOf(id) !== index); if (duplicateIds.length > 0) { errors.push(`Duplicate node IDs found: ${[...new Set(duplicateIds)].join(', ')}`); } return { valid: errors.length === 0, errors }; } /** * Performs comprehensive data validation before persistence. * @param nodes - Array of nodes to validate * @param edges - Array of edges to validate * @returns Validation result with all errors */ performDataValidation(nodes, edges) { const allErrors = []; // Validate nodes const nodeValidation = this.validateNodes(nodes); allErrors.push(...nodeValidation.errors); // Validate edges const edgeValidation = this.validateEdges(edges); allErrors.push(...edgeValidation.errors); // Validate data consistency if (nodeValidation.valid && edgeValidation.valid) { const consistencyValidation = this.validateDataConsistency(nodes, edges); allErrors.push(...consistencyValidation.errors); } return { valid: allErrors.length === 0, errors: allErrors }; } } exports.DataLoader = DataLoader;