UNPKG

neuradb

Version:

Lightweight In-Memory Vector Database for Fast Similarity Search

335 lines 11 kB
import { VectorDocument, SearchResult, SimilarityMethod, SearchOptions, VectorStoreStats } from "./interfaces/vector-store.interface"; interface OpenAIEmbeddingResponse { data: Array<{ embedding: number[]; index: number; }>; model: string; usage: { prompt_tokens: number; total_tokens: number; }; } interface OpenAIInstance { embeddings: { create: (params: { input: string | string[]; model: string; }) => Promise<OpenAIEmbeddingResponse>; }; } interface NeuraDBOptions { openai?: OpenAIInstance; embeddingModel?: string; defaultBatchSize?: number; batchDelay?: number; } interface DocumentWithOptionalEmbedding extends Omit<VectorDocument, 'embedding'> { embedding?: number[]; } interface AddDocumentsOptions { createEmbedding?: boolean; batchSize?: number; batchDelay?: number; onProgress?: (processed: number, total: number) => void; } /** * VectorStore provides high-performance in-memory vector similarity search * with support for multiple similarity methods, document management, and automatic OpenAI embeddings. * * Features: * - Zero dependencies (OpenAI optional) * - Multiple similarity methods (cosine, euclidean, dot product) * - Automatic embedding generation with OpenAI * - Metadata filtering * - TypeScript support * - Memory-efficient storage * * @example * ```typescript * import { NeuraDB } from 'vector-similarity-search'; * import OpenAI from 'openai'; * * const openai = new OpenAI({ apiKey: 'your-api-key' }); * const store = new NeuraDB({ openai }); * * // Add documents with automatic embedding generation * await store.addDocument({ * id: 'doc1', * content: 'Hello world', * metadata: { category: 'greeting' } * }, { createEmbedding: true }); * * // Search with automatic query embedding * const results = await store.search('Hello there', { * limit: 5, * threshold: 0.7, * similarityMethod: 'cosine' * }); * ``` */ export declare class NeuraDB { private documents; private openai?; private embeddingModel; private defaultBatchSize; private batchDelay; constructor(options?: NeuraDBOptions); /** * Generate embedding using OpenAI * @param text Text to embed * @returns Embedding vector * @throws Error if OpenAI instance is not provided */ generateEmbedding(text: string): Promise<number[]>; /** * Generate embeddings for multiple texts using OpenAI with batch processing * @param texts Array of texts to embed * @param batchSize Number of texts to process in each batch * @param batchDelay Delay between batches in milliseconds * @returns Array of embedding vectors * @throws Error if OpenAI instance is not provided */ generateEmbeddings(texts: string[], batchSize?: number, batchDelay?: number): Promise<number[][]>; /** * Add a single document with pre-computed embedding or generate embedding automatically * @param document The document to add (embedding optional if createEmbedding is true) * @param options Options for document addition * @throws Error if document doesn't have a valid embedding or dimensions don't match * @example * ```typescript * // With pre-computed embedding * store.addDocument({ * id: 'doc1', * content: 'Sample text', * embedding: [0.1, 0.2, 0.3], * metadata: { type: 'article' } * }); * * // With automatic embedding generation * await store.addDocument({ * id: 'doc2', * content: 'Another text', * metadata: { type: 'article' } * }, { createEmbedding: true }); * ``` */ addDocument(document: DocumentWithOptionalEmbedding, options?: { createEmbedding?: boolean; }): Promise<void>; /** * Add multiple documents with pre-computed embeddings or generate embeddings automatically * @param documents Array of documents to add * @param options Options for document addition including batch processing * @throws Error if any document doesn't have a valid embedding * @example * ```typescript * // With automatic embedding generation and batch processing * await store.addDocuments([ * { id: '1', content: 'Text 1' }, * { id: '2', content: 'Text 2' } * ], { * createEmbedding: true, * batchSize: 50, * batchDelay: 500, * onProgress: (processed, total) => console.log(`${processed}/${total}`) * }); * ``` */ addDocuments(documents: DocumentWithOptionalEmbedding[], options?: AddDocumentsOptions): Promise<void>; /** * Search for similar documents using vector similarity * @param query The query (can be embedding vector or text string) * @param options Search configuration options * @returns Array of search results sorted by similarity (highest first) * @throws Error if query is invalid * @example * ```typescript * // Search with pre-computed embedding * const results = store.search([0.1, 0.2, 0.3], { * limit: 10, * threshold: 0.5, * similarityMethod: 'cosine', * metadataFilter: { category: 'news' } * }); * * // Search with text query (automatic embedding) * const results = await store.search('Hello world', { * limit: 10, * threshold: 0.5, * similarityMethod: 'cosine' * }); * ``` */ search(query: number[] | string, options?: SearchOptions): Promise<SearchResult[]>; /** * Find the most similar document to the query * @param query The query (can be embedding vector or text string) * @param similarityMethod Similarity calculation method * @returns Most similar document or null if none found * @example * ```typescript * const mostSimilar = await store.findMostSimilar('Hello world', 'cosine'); * if (mostSimilar) { * console.log(`Most similar: ${mostSimilar.document.content}`); * } * ``` */ findMostSimilar(query: number[] | string, similarityMethod?: SimilarityMethod): Promise<SearchResult | null>; /** * Get document by ID * @param id Document ID * @returns Document or undefined if not found */ getDocument(id: string): VectorDocument | undefined; /** * Check if a document exists * @param id Document ID * @returns True if document exists */ hasDocument(id: string): boolean; /** * Remove document by ID * @param id Document ID to remove * @returns True if document was removed, false if not found */ removeDocument(id: string): boolean; /** * Update an existing document * @param document Updated document (embedding optional if createEmbedding is true) * @param options Options for document update * @returns True if document was updated, false if not found * @throws Error if document doesn't have a valid embedding */ updateDocument(document: DocumentWithOptionalEmbedding, options?: { createEmbedding?: boolean; }): Promise<boolean>; /** * Get all documents * @returns Array of all documents */ getAllDocuments(): VectorDocument[]; /** * Get documents by metadata filter * @param filter Metadata filter criteria * @returns Array of matching documents * @example * ```typescript * const newsArticles = store.getDocumentsByMetadata({ category: 'news' }); * ``` */ getDocumentsByMetadata(filter: Record<string, any>): VectorDocument[]; /** * Clear all documents from the store */ clear(): void; /** * Get the number of documents in the store * @returns Number of documents stored */ size(): number; /** * Check if the store is empty * @returns True if no documents are stored */ isEmpty(): boolean; /** * Get embedding dimensions from stored documents * @returns Number of dimensions or null if no documents */ getEmbeddingDimensions(): number | null; /** * Get comprehensive statistics about the vector store * @returns Statistics including document count, dimensions, and memory usage */ getStats(): VectorStoreStats; /** * Check if OpenAI instance is available * @returns True if OpenAI instance is configured */ hasOpenAI(): boolean; /** * Get current embedding model name * @returns Embedding model name */ getEmbeddingModel(): string; /** * Set embedding model name * @param model New embedding model name */ setEmbeddingModel(model: string): void; /** * Get default batch size for embedding operations * @returns Default batch size */ getDefaultBatchSize(): number; /** * Set default batch size for embedding operations * @param batchSize New default batch size */ setDefaultBatchSize(batchSize: number): void; /** * Get default batch delay * @returns Default batch delay in milliseconds */ getDefaultBatchDelay(): number; /** * Set default batch delay * @param delay New default batch delay in milliseconds */ setDefaultBatchDelay(delay: number): void; /** * Split array into chunks of specified size */ private chunkArray; /** * Delay execution for specified milliseconds */ private delay; /** * Validate document structure and embedding */ private validateDocument; /** * Validate embedding dimensions against existing documents */ private validateEmbeddingDimensions; /** * Filter documents by metadata criteria */ private filterByMetadata; /** * Calculate similarity between two vectors using specified method */ private calculateSimilarity; /** * Calculate cosine similarity between two vectors * @returns Similarity score between -1 and 1 (1 being identical) */ private cosineSimilarity; /** * Calculate Euclidean similarity between two vectors * @returns Similarity score between 0 and 1 (1 being identical) */ private euclideanSimilarity; /** * Calculate dot product similarity between two vectors * @returns Dot product value */ private dotProductSimilarity; /** * Search for similar documents with pagination metadata * @param query The query (can be embedding vector or text string) * @param options Search configuration options (must include pageSize) * @returns Object with data (results), page, pageSize, totalResults, totalPages */ searchWithPagination(query: number[] | string, options?: SearchOptions): Promise<{ data: SearchResult[]; page: number; pageSize: number; totalResults: number; totalPages: number; }>; } export {}; //# sourceMappingURL=vector-store.d.ts.map