hikma-engine
Version:
Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents
133 lines • 5.46 kB
TypeScript
/**
* @file Responsible for generating vector embeddings for various node types.
*/
import { BaseNode, NodeWithEmbedding } from '../types';
import { ConfigManager } from '../config';
export declare class EmbeddingService {
private config;
private logger;
private model;
/**
* Initializes the Embedding Service.
* @param {ConfigManager} config - Configuration manager instance.
*/
constructor(config: ConfigManager);
/**
* Loads the pre-trained embedding model.
*/
loadModel(): Promise<void>;
/**
* Extracts meaningful text content from a node for embedding generation.
* @param {BaseNode} node - The node to extract text from.
* @returns {string} The extracted text content.
*/
private getTextForNode;
/**
* Generates a vector embedding for a given text.
* @param {string} text - The text to embed.
* @param {boolean} isQuery - Whether this text is a search query (requires special prompt for some models).
* @returns {Promise<number[]>} The generated embedding vector.
*/
private generateEmbedding;
private simpleHash;
/**
* Normalizes user-provided OpenAI-compatible API URL to a base URL.
* Accepts inputs like base or full paths and returns a clean base such as http://localhost:11434
*/
private normalizeOpenAIBaseUrl;
/**
* Tests connection to LM Studio server.
*/
private testLMStudioConnection;
/**
* Generates embedding using transformers.js pipeline.
* @param {string} text - The text to generate an embedding for.
* @param {boolean} isQuery - Whether this text is a search query.
* @returns {Promise<number[]>} The generated embedding vector.
*/
private generateTransformersEmbedding;
/**
* Generates embedding using Python script.
* @param {string} text - The text to generate an embedding for.
* @param {boolean} isQuery - Whether this text is a search query.
* @returns {Promise<number[]>} The generated embedding vector.
*/
private generatePythonEmbedding;
/**
* Generates embedding using LM Studio server.
* @param {string} text - The text to generate an embedding for.
* @returns {Promise<number[]>} The generated embedding vector.
*/
private generateLMStudioEmbedding;
/**
* Generates embedding using OpenAI-compatible API (like Ollama).
* @param {string} text - The text to generate an embedding for.
* @param {boolean} isQuery - Whether this is a query embedding.
* @returns {Promise<number[]>} The generated embedding vector.
*/
private generateOpenAIEmbedding;
/**
* Processes nodes in batches to generate embeddings efficiently.
* @param {BaseNode[]} nodes - Array of nodes to embed.
* @returns {Promise<NodeWithEmbedding[]>} Array of nodes with embeddings.
*/
private processBatch;
/**
* Generates embeddings for all provided nodes.
* @param {BaseNode[]} nodes - Array of nodes to generate embeddings for.
* @returns {Promise<NodeWithEmbedding[]>} Array of nodes with embeddings attached.
*/
embedNodes(nodes: BaseNode[]): Promise<NodeWithEmbedding[]>;
/**
* Generates an embedding for a single text query (useful for search).
* @param {string} query - The query text to embed.
* @returns {Promise<number[]>} The embedding vector for the query.
*/
embedQuery(query: string): Promise<number[]>;
/**
* Calculates cosine similarity between two embedding vectors.
* @param {number[]} embedding1 - First embedding vector.
* @param {number[]} embedding2 - Second embedding vector.
* @returns {number} Cosine similarity score between -1 and 1.
*/
calculateSimilarity(embedding1: number[], embedding2: number[]): number;
/**
* Finds the most similar nodes to a query embedding.
* @param {number[]} queryEmbedding - The query embedding vector.
* @param {NodeWithEmbedding[]} nodes - Array of nodes with embeddings.
* @param {number} topK - Number of top similar nodes to return.
* @returns {Array<{node: NodeWithEmbedding, similarity: number}>} Top similar nodes with similarity scores.
*/
findSimilarNodes(queryEmbedding: number[], nodes: NodeWithEmbedding[], topK?: number): Array<{
node: NodeWithEmbedding;
similarity: number;
}>;
/**
* Gets statistics about the embedded nodes by type.
* @param {NodeWithEmbedding[]} nodes - Array of nodes with embeddings.
* @returns {Record<string, number>} Node type statistics.
*/
private getNodeTypeStats;
/**
* Generates an embedding for a search query.
* @param {string} query - The search query text.
* @returns {Promise<number[]>} The generated embedding vector.
*/
generateQueryEmbedding(query: string): Promise<number[]>;
/**
* Generates an embedding for document content.
* @param {string} text - The document text.
* @returns {Promise<number[]>} The generated embedding vector.
*/
generateDocumentEmbedding(text: string): Promise<number[]>;
/**
* Gets embedding service statistics.
* @returns {Promise<{modelLoaded: boolean, model: string, dimensions: number}>}
*/
getStats(): Promise<{
modelLoaded: boolean;
model: string;
dimensions: number;
}>;
}
//# sourceMappingURL=embedding-service.d.ts.map