UNPKG

hikma-engine

Version:

Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents

286 lines (285 loc) 11.5 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.getCodeEmbedding = getCodeEmbedding; exports.getPythonQueryEmbedding = getPythonQueryEmbedding; exports.getPythonDocumentEmbedding = getPythonDocumentEmbedding; exports.getDetailedEmbedding = getDetailedEmbedding; exports.shutdownPythonEmbedding = shutdownPythonEmbedding; const child_process_1 = require("child_process"); const path = __importStar(require("path")); const logger_1 = require("../utils/logger"); const python_dependency_checker_1 = require("../utils/python-dependency-checker"); // Singleton persistent Python process class PersistentPythonEmbedding { constructor() { this.process = null; this.isReady = false; this.requestId = 0; this.pendingRequests = new Map(); this.logger = (0, logger_1.getLogger)('PersistentPythonEmbedding'); this.initializationPromise = null; } async initialize() { // If already initialized, return immediately if (this.process && this.isReady) return; // If initialization is in progress, wait for it if (this.initializationPromise) { return this.initializationPromise; } // Start initialization this.initializationPromise = this.doInitialize(); return this.initializationPromise; } async doInitialize() { if (this.process && this.isReady) return; this.logger.info('Starting persistent Python embedding process...'); // Check Python dependencies before starting await (0, python_dependency_checker_1.ensurePythonDependencies)(false, false); // Get model name from config const { ConfigManager } = await Promise.resolve().then(() => __importStar(require('../config'))); const config = new ConfigManager(process.cwd()); const aiConfig = config.getConfig().ai; const modelName = aiConfig.embedding.model; this.logger.info('Using Python embedding model', { model: modelName }); // Python script is in src directory, not dist const projectRoot = path.resolve(__dirname, '..', '..'); const scriptPath = path.join(projectRoot, 'src', 'python', 'embed_server.py'); const workingDir = projectRoot; this.logger.info('Python script details', { scriptPath, workingDir, modelName }); this.process = (0, child_process_1.spawn)('python3', [ scriptPath, modelName // Pass model name as argument ], { cwd: workingDir, stdio: ['pipe', 'pipe', 'pipe'] }); if (!this.process.stdout || !this.process.stderr || !this.process.stdin) { throw new Error('Failed to create Python process with proper stdio'); } // Handle stdin errors (like EPIPE) this.process.stdin.on('error', (error) => { this.logger.warn('Python process stdin error', { error: error.message }); // Don't throw here, just log the error }); // Handle process exit this.process.on('exit', (code) => { this.logger.warn('Python process exited', { code }); this.isReady = false; this.initializationPromise = null; // Reset initialization promise this.rejectAllPending(new Error(`Python process exited with code ${code}`)); }); // Handle process errors this.process.on('error', (error) => { this.logger.error('Python process error', { error: error.message }); this.isReady = false; this.initializationPromise = null; // Reset initialization promise this.rejectAllPending(new Error(`Python process error: ${error.message}`)); }); // Handle stdout data let buffer = ''; this.process.stdout.on('data', (data) => { buffer += data.toString(); // Process complete JSON lines while (buffer.includes('\n')) { const lineEnd = buffer.indexOf('\n'); const line = buffer.slice(0, lineEnd).trim(); buffer = buffer.slice(lineEnd + 1); if (line) { this.handleResponse(line); } } }); // Handle stderr this.process.stderr.on('data', (data) => { const errorText = data.toString().trim(); if (errorText) { this.logger.warn('Python process stderr', { message: errorText }); // If we see any stderr output, it might indicate an issue if (errorText.includes('ERROR') || errorText.includes('Exception') || errorText.includes('Traceback')) { this.logger.error('Python process error detected', { error: errorText }); } } }); // Wait for initialization await this.waitForReady(); } async waitForReady() { return new Promise((resolve, reject) => { const timeout = setTimeout(() => { reject(new Error('Python process initialization timeout')); }, 600000); // 10 minutes timeout for model download and initial setup const checkReady = () => { if (this.isReady) { clearTimeout(timeout); resolve(); } else { setTimeout(checkReady, 100); } }; checkReady(); }); } handleResponse(line) { try { const response = JSON.parse(line); if (response.type === 'ready') { this.isReady = true; this.logger.info('Python embedding process ready'); return; } if (response.type === 'result' && typeof response.id === 'number') { const pending = this.pendingRequests.get(response.id); if (pending) { clearTimeout(pending.timeout); this.pendingRequests.delete(response.id); if (response.error) { pending.reject(new Error(`Python embedding error: ${response.error}`)); } else if (response.embedding && Array.isArray(response.embedding)) { pending.resolve(response.embedding); } else { pending.reject(new Error('Invalid response format from Python')); } } } } catch (error) { this.logger.error('Failed to parse Python response', { line, error }); } } rejectAllPending(error) { for (const [id, pending] of this.pendingRequests.entries()) { clearTimeout(pending.timeout); pending.reject(error); } this.pendingRequests.clear(); } async generateEmbedding(text, isQuery = false) { if (!this.process || !this.isReady) { await this.initialize(); } return new Promise((resolve, reject) => { const id = ++this.requestId; const timeout = setTimeout(() => { this.pendingRequests.delete(id); reject(new Error('Python embedding request timeout')); }, 60000); // 1 minute timeout per embedding request this.pendingRequests.set(id, { resolve, reject, timeout }); const request = { id, text, is_query: isQuery }; try { const success = this.process.stdin.write(JSON.stringify(request) + '\n'); if (!success) { this.logger.warn('Python stdin buffer full, waiting for drain'); } } catch (error) { clearTimeout(timeout); this.pendingRequests.delete(id); const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error('Failed to write to Python process', { error: errorMessage }); reject(new Error(`Failed to send request to Python: ${errorMessage}`)); } }); } async shutdown() { if (this.process) { this.process.kill(); this.process = null; this.isReady = false; this.initializationPromise = null; } } } // Global singleton instance const persistentPython = new PersistentPythonEmbedding(); // Cleanup on process exit process.on('exit', () => { persistentPython.shutdown().catch(() => { // Ignore cleanup errors on exit }); }); process.on('SIGINT', async () => { await persistentPython.shutdown(); process.exit(0); }); process.on('SIGTERM', async () => { await persistentPython.shutdown(); process.exit(0); }); async function getCodeEmbedding(text, isQuery = false) { return await persistentPython.generateEmbedding(text, isQuery); } /** * Generate embedding for a query (with prompt) */ async function getPythonQueryEmbedding(query) { return getCodeEmbedding(query, true); } /** * Generate embedding for document content (without prompt) */ async function getPythonDocumentEmbedding(text) { return getCodeEmbedding(text, false); } /** * Get detailed embedding result with metadata */ async function getDetailedEmbedding(text, isQuery = false) { const embedding = await persistentPython.generateEmbedding(text, isQuery); // Get the actual model name from config const { ConfigManager } = await Promise.resolve().then(() => __importStar(require('../config'))); const config = new ConfigManager(process.cwd()); const modelName = config.getConfig().ai.embedding.model; return { embedding, dimensions: embedding.length, model: modelName }; } /** * Shutdown the persistent Python process (useful for cleanup) */ async function shutdownPythonEmbedding() { await persistentPython.shutdown(); }