hikma-engine
Version:
Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents
286 lines (285 loc) • 11.5 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.getCodeEmbedding = getCodeEmbedding;
exports.getPythonQueryEmbedding = getPythonQueryEmbedding;
exports.getPythonDocumentEmbedding = getPythonDocumentEmbedding;
exports.getDetailedEmbedding = getDetailedEmbedding;
exports.shutdownPythonEmbedding = shutdownPythonEmbedding;
const child_process_1 = require("child_process");
const path = __importStar(require("path"));
const logger_1 = require("../utils/logger");
const python_dependency_checker_1 = require("../utils/python-dependency-checker");
// Singleton persistent Python process
class PersistentPythonEmbedding {
constructor() {
this.process = null;
this.isReady = false;
this.requestId = 0;
this.pendingRequests = new Map();
this.logger = (0, logger_1.getLogger)('PersistentPythonEmbedding');
this.initializationPromise = null;
}
async initialize() {
// If already initialized, return immediately
if (this.process && this.isReady)
return;
// If initialization is in progress, wait for it
if (this.initializationPromise) {
return this.initializationPromise;
}
// Start initialization
this.initializationPromise = this.doInitialize();
return this.initializationPromise;
}
async doInitialize() {
if (this.process && this.isReady)
return;
this.logger.info('Starting persistent Python embedding process...');
// Check Python dependencies before starting
await (0, python_dependency_checker_1.ensurePythonDependencies)(false, false);
// Get model name from config
const { ConfigManager } = await Promise.resolve().then(() => __importStar(require('../config')));
const config = new ConfigManager(process.cwd());
const aiConfig = config.getConfig().ai;
const modelName = aiConfig.embedding.model;
this.logger.info('Using Python embedding model', { model: modelName });
// Python script is in src directory, not dist
const projectRoot = path.resolve(__dirname, '..', '..');
const scriptPath = path.join(projectRoot, 'src', 'python', 'embed_server.py');
const workingDir = projectRoot;
this.logger.info('Python script details', { scriptPath, workingDir, modelName });
this.process = (0, child_process_1.spawn)('python3', [
scriptPath,
modelName // Pass model name as argument
], {
cwd: workingDir,
stdio: ['pipe', 'pipe', 'pipe']
});
if (!this.process.stdout || !this.process.stderr || !this.process.stdin) {
throw new Error('Failed to create Python process with proper stdio');
}
// Handle stdin errors (like EPIPE)
this.process.stdin.on('error', (error) => {
this.logger.warn('Python process stdin error', { error: error.message });
// Don't throw here, just log the error
});
// Handle process exit
this.process.on('exit', (code) => {
this.logger.warn('Python process exited', { code });
this.isReady = false;
this.initializationPromise = null; // Reset initialization promise
this.rejectAllPending(new Error(`Python process exited with code ${code}`));
});
// Handle process errors
this.process.on('error', (error) => {
this.logger.error('Python process error', { error: error.message });
this.isReady = false;
this.initializationPromise = null; // Reset initialization promise
this.rejectAllPending(new Error(`Python process error: ${error.message}`));
});
// Handle stdout data
let buffer = '';
this.process.stdout.on('data', (data) => {
buffer += data.toString();
// Process complete JSON lines
while (buffer.includes('\n')) {
const lineEnd = buffer.indexOf('\n');
const line = buffer.slice(0, lineEnd).trim();
buffer = buffer.slice(lineEnd + 1);
if (line) {
this.handleResponse(line);
}
}
});
// Handle stderr
this.process.stderr.on('data', (data) => {
const errorText = data.toString().trim();
if (errorText) {
this.logger.warn('Python process stderr', { message: errorText });
// If we see any stderr output, it might indicate an issue
if (errorText.includes('ERROR') || errorText.includes('Exception') || errorText.includes('Traceback')) {
this.logger.error('Python process error detected', { error: errorText });
}
}
});
// Wait for initialization
await this.waitForReady();
}
async waitForReady() {
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
reject(new Error('Python process initialization timeout'));
}, 600000); // 10 minutes timeout for model download and initial setup
const checkReady = () => {
if (this.isReady) {
clearTimeout(timeout);
resolve();
}
else {
setTimeout(checkReady, 100);
}
};
checkReady();
});
}
handleResponse(line) {
try {
const response = JSON.parse(line);
if (response.type === 'ready') {
this.isReady = true;
this.logger.info('Python embedding process ready');
return;
}
if (response.type === 'result' && typeof response.id === 'number') {
const pending = this.pendingRequests.get(response.id);
if (pending) {
clearTimeout(pending.timeout);
this.pendingRequests.delete(response.id);
if (response.error) {
pending.reject(new Error(`Python embedding error: ${response.error}`));
}
else if (response.embedding && Array.isArray(response.embedding)) {
pending.resolve(response.embedding);
}
else {
pending.reject(new Error('Invalid response format from Python'));
}
}
}
}
catch (error) {
this.logger.error('Failed to parse Python response', { line, error });
}
}
rejectAllPending(error) {
for (const [id, pending] of this.pendingRequests.entries()) {
clearTimeout(pending.timeout);
pending.reject(error);
}
this.pendingRequests.clear();
}
async generateEmbedding(text, isQuery = false) {
if (!this.process || !this.isReady) {
await this.initialize();
}
return new Promise((resolve, reject) => {
const id = ++this.requestId;
const timeout = setTimeout(() => {
this.pendingRequests.delete(id);
reject(new Error('Python embedding request timeout'));
}, 60000); // 1 minute timeout per embedding request
this.pendingRequests.set(id, { resolve, reject, timeout });
const request = {
id,
text,
is_query: isQuery
};
try {
const success = this.process.stdin.write(JSON.stringify(request) + '\n');
if (!success) {
this.logger.warn('Python stdin buffer full, waiting for drain');
}
}
catch (error) {
clearTimeout(timeout);
this.pendingRequests.delete(id);
const errorMessage = error instanceof Error ? error.message : String(error);
this.logger.error('Failed to write to Python process', { error: errorMessage });
reject(new Error(`Failed to send request to Python: ${errorMessage}`));
}
});
}
async shutdown() {
if (this.process) {
this.process.kill();
this.process = null;
this.isReady = false;
this.initializationPromise = null;
}
}
}
// Global singleton instance
const persistentPython = new PersistentPythonEmbedding();
// Cleanup on process exit
process.on('exit', () => {
persistentPython.shutdown().catch(() => {
// Ignore cleanup errors on exit
});
});
process.on('SIGINT', async () => {
await persistentPython.shutdown();
process.exit(0);
});
process.on('SIGTERM', async () => {
await persistentPython.shutdown();
process.exit(0);
});
async function getCodeEmbedding(text, isQuery = false) {
return await persistentPython.generateEmbedding(text, isQuery);
}
/**
* Generate embedding for a query (with prompt)
*/
async function getPythonQueryEmbedding(query) {
return getCodeEmbedding(query, true);
}
/**
* Generate embedding for document content (without prompt)
*/
async function getPythonDocumentEmbedding(text) {
return getCodeEmbedding(text, false);
}
/**
* Get detailed embedding result with metadata
*/
async function getDetailedEmbedding(text, isQuery = false) {
const embedding = await persistentPython.generateEmbedding(text, isQuery);
// Get the actual model name from config
const { ConfigManager } = await Promise.resolve().then(() => __importStar(require('../config')));
const config = new ConfigManager(process.cwd());
const modelName = config.getConfig().ai.embedding.model;
return {
embedding,
dimensions: embedding.length,
model: modelName
};
}
/**
* Shutdown the persistent Python process (useful for cleanup)
*/
async function shutdownPythonEmbedding() {
await persistentPython.shutdown();
}