UNPKG

hikma-engine

Version:

Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents

133 lines 5.46 kB
/** * @file Responsible for generating vector embeddings for various node types. */ import { BaseNode, NodeWithEmbedding } from '../types'; import { ConfigManager } from '../config'; export declare class EmbeddingService { private config; private logger; private model; /** * Initializes the Embedding Service. * @param {ConfigManager} config - Configuration manager instance. */ constructor(config: ConfigManager); /** * Loads the pre-trained embedding model. */ loadModel(): Promise<void>; /** * Extracts meaningful text content from a node for embedding generation. * @param {BaseNode} node - The node to extract text from. * @returns {string} The extracted text content. */ private getTextForNode; /** * Generates a vector embedding for a given text. * @param {string} text - The text to embed. * @param {boolean} isQuery - Whether this text is a search query (requires special prompt for some models). * @returns {Promise<number[]>} The generated embedding vector. */ private generateEmbedding; private simpleHash; /** * Normalizes user-provided OpenAI-compatible API URL to a base URL. * Accepts inputs like base or full paths and returns a clean base such as http://localhost:11434 */ private normalizeOpenAIBaseUrl; /** * Tests connection to LM Studio server. */ private testLMStudioConnection; /** * Generates embedding using transformers.js pipeline. * @param {string} text - The text to generate an embedding for. * @param {boolean} isQuery - Whether this text is a search query. * @returns {Promise<number[]>} The generated embedding vector. */ private generateTransformersEmbedding; /** * Generates embedding using Python script. * @param {string} text - The text to generate an embedding for. * @param {boolean} isQuery - Whether this text is a search query. * @returns {Promise<number[]>} The generated embedding vector. */ private generatePythonEmbedding; /** * Generates embedding using LM Studio server. * @param {string} text - The text to generate an embedding for. * @returns {Promise<number[]>} The generated embedding vector. */ private generateLMStudioEmbedding; /** * Generates embedding using OpenAI-compatible API (like Ollama). * @param {string} text - The text to generate an embedding for. * @param {boolean} isQuery - Whether this is a query embedding. * @returns {Promise<number[]>} The generated embedding vector. */ private generateOpenAIEmbedding; /** * Processes nodes in batches to generate embeddings efficiently. * @param {BaseNode[]} nodes - Array of nodes to embed. * @returns {Promise<NodeWithEmbedding[]>} Array of nodes with embeddings. */ private processBatch; /** * Generates embeddings for all provided nodes. * @param {BaseNode[]} nodes - Array of nodes to generate embeddings for. * @returns {Promise<NodeWithEmbedding[]>} Array of nodes with embeddings attached. */ embedNodes(nodes: BaseNode[]): Promise<NodeWithEmbedding[]>; /** * Generates an embedding for a single text query (useful for search). * @param {string} query - The query text to embed. * @returns {Promise<number[]>} The embedding vector for the query. */ embedQuery(query: string): Promise<number[]>; /** * Calculates cosine similarity between two embedding vectors. * @param {number[]} embedding1 - First embedding vector. * @param {number[]} embedding2 - Second embedding vector. * @returns {number} Cosine similarity score between -1 and 1. */ calculateSimilarity(embedding1: number[], embedding2: number[]): number; /** * Finds the most similar nodes to a query embedding. * @param {number[]} queryEmbedding - The query embedding vector. * @param {NodeWithEmbedding[]} nodes - Array of nodes with embeddings. * @param {number} topK - Number of top similar nodes to return. * @returns {Array<{node: NodeWithEmbedding, similarity: number}>} Top similar nodes with similarity scores. */ findSimilarNodes(queryEmbedding: number[], nodes: NodeWithEmbedding[], topK?: number): Array<{ node: NodeWithEmbedding; similarity: number; }>; /** * Gets statistics about the embedded nodes by type. * @param {NodeWithEmbedding[]} nodes - Array of nodes with embeddings. * @returns {Record<string, number>} Node type statistics. */ private getNodeTypeStats; /** * Generates an embedding for a search query. * @param {string} query - The search query text. * @returns {Promise<number[]>} The generated embedding vector. */ generateQueryEmbedding(query: string): Promise<number[]>; /** * Generates an embedding for document content. * @param {string} text - The document text. * @returns {Promise<number[]>} The generated embedding vector. */ generateDocumentEmbedding(text: string): Promise<number[]>; /** * Gets embedding service statistics. * @returns {Promise<{modelLoaded: boolean, model: string, dimensions: number}>} */ getStats(): Promise<{ modelLoaded: boolean; model: string; dimensions: number; }>; } //# sourceMappingURL=embedding-service.d.ts.map