neuradb
Version:
Lightweight In-Memory Vector Database for Fast Similarity Search
335 lines • 11 kB
TypeScript
import { VectorDocument, SearchResult, SimilarityMethod, SearchOptions, VectorStoreStats } from "./interfaces/vector-store.interface";
interface OpenAIEmbeddingResponse {
data: Array<{
embedding: number[];
index: number;
}>;
model: string;
usage: {
prompt_tokens: number;
total_tokens: number;
};
}
interface OpenAIInstance {
embeddings: {
create: (params: {
input: string | string[];
model: string;
}) => Promise<OpenAIEmbeddingResponse>;
};
}
interface NeuraDBOptions {
openai?: OpenAIInstance;
embeddingModel?: string;
defaultBatchSize?: number;
batchDelay?: number;
}
interface DocumentWithOptionalEmbedding extends Omit<VectorDocument, 'embedding'> {
embedding?: number[];
}
interface AddDocumentsOptions {
createEmbedding?: boolean;
batchSize?: number;
batchDelay?: number;
onProgress?: (processed: number, total: number) => void;
}
/**
* VectorStore provides high-performance in-memory vector similarity search
* with support for multiple similarity methods, document management, and automatic OpenAI embeddings.
*
* Features:
* - Zero dependencies (OpenAI optional)
* - Multiple similarity methods (cosine, euclidean, dot product)
* - Automatic embedding generation with OpenAI
* - Metadata filtering
* - TypeScript support
* - Memory-efficient storage
*
* @example
* ```typescript
* import { NeuraDB } from 'vector-similarity-search';
* import OpenAI from 'openai';
*
* const openai = new OpenAI({ apiKey: 'your-api-key' });
* const store = new NeuraDB({ openai });
*
* // Add documents with automatic embedding generation
* await store.addDocument({
* id: 'doc1',
* content: 'Hello world',
* metadata: { category: 'greeting' }
* }, { createEmbedding: true });
*
* // Search with automatic query embedding
* const results = await store.search('Hello there', {
* limit: 5,
* threshold: 0.7,
* similarityMethod: 'cosine'
* });
* ```
*/
export declare class NeuraDB {
private documents;
private openai?;
private embeddingModel;
private defaultBatchSize;
private batchDelay;
constructor(options?: NeuraDBOptions);
/**
* Generate embedding using OpenAI
* @param text Text to embed
* @returns Embedding vector
* @throws Error if OpenAI instance is not provided
*/
generateEmbedding(text: string): Promise<number[]>;
/**
* Generate embeddings for multiple texts using OpenAI with batch processing
* @param texts Array of texts to embed
* @param batchSize Number of texts to process in each batch
* @param batchDelay Delay between batches in milliseconds
* @returns Array of embedding vectors
* @throws Error if OpenAI instance is not provided
*/
generateEmbeddings(texts: string[], batchSize?: number, batchDelay?: number): Promise<number[][]>;
/**
* Add a single document with pre-computed embedding or generate embedding automatically
* @param document The document to add (embedding optional if createEmbedding is true)
* @param options Options for document addition
* @throws Error if document doesn't have a valid embedding or dimensions don't match
* @example
* ```typescript
* // With pre-computed embedding
* store.addDocument({
* id: 'doc1',
* content: 'Sample text',
* embedding: [0.1, 0.2, 0.3],
* metadata: { type: 'article' }
* });
*
* // With automatic embedding generation
* await store.addDocument({
* id: 'doc2',
* content: 'Another text',
* metadata: { type: 'article' }
* }, { createEmbedding: true });
* ```
*/
addDocument(document: DocumentWithOptionalEmbedding, options?: {
createEmbedding?: boolean;
}): Promise<void>;
/**
* Add multiple documents with pre-computed embeddings or generate embeddings automatically
* @param documents Array of documents to add
* @param options Options for document addition including batch processing
* @throws Error if any document doesn't have a valid embedding
* @example
* ```typescript
* // With automatic embedding generation and batch processing
* await store.addDocuments([
* { id: '1', content: 'Text 1' },
* { id: '2', content: 'Text 2' }
* ], {
* createEmbedding: true,
* batchSize: 50,
* batchDelay: 500,
* onProgress: (processed, total) => console.log(`${processed}/${total}`)
* });
* ```
*/
addDocuments(documents: DocumentWithOptionalEmbedding[], options?: AddDocumentsOptions): Promise<void>;
/**
* Search for similar documents using vector similarity
* @param query The query (can be embedding vector or text string)
* @param options Search configuration options
* @returns Array of search results sorted by similarity (highest first)
* @throws Error if query is invalid
* @example
* ```typescript
* // Search with pre-computed embedding
* const results = store.search([0.1, 0.2, 0.3], {
* limit: 10,
* threshold: 0.5,
* similarityMethod: 'cosine',
* metadataFilter: { category: 'news' }
* });
*
* // Search with text query (automatic embedding)
* const results = await store.search('Hello world', {
* limit: 10,
* threshold: 0.5,
* similarityMethod: 'cosine'
* });
* ```
*/
search(query: number[] | string, options?: SearchOptions): Promise<SearchResult[]>;
/**
* Find the most similar document to the query
* @param query The query (can be embedding vector or text string)
* @param similarityMethod Similarity calculation method
* @returns Most similar document or null if none found
* @example
* ```typescript
* const mostSimilar = await store.findMostSimilar('Hello world', 'cosine');
* if (mostSimilar) {
* console.log(`Most similar: ${mostSimilar.document.content}`);
* }
* ```
*/
findMostSimilar(query: number[] | string, similarityMethod?: SimilarityMethod): Promise<SearchResult | null>;
/**
* Get document by ID
* @param id Document ID
* @returns Document or undefined if not found
*/
getDocument(id: string): VectorDocument | undefined;
/**
* Check if a document exists
* @param id Document ID
* @returns True if document exists
*/
hasDocument(id: string): boolean;
/**
* Remove document by ID
* @param id Document ID to remove
* @returns True if document was removed, false if not found
*/
removeDocument(id: string): boolean;
/**
* Update an existing document
* @param document Updated document (embedding optional if createEmbedding is true)
* @param options Options for document update
* @returns True if document was updated, false if not found
* @throws Error if document doesn't have a valid embedding
*/
updateDocument(document: DocumentWithOptionalEmbedding, options?: {
createEmbedding?: boolean;
}): Promise<boolean>;
/**
* Get all documents
* @returns Array of all documents
*/
getAllDocuments(): VectorDocument[];
/**
* Get documents by metadata filter
* @param filter Metadata filter criteria
* @returns Array of matching documents
* @example
* ```typescript
* const newsArticles = store.getDocumentsByMetadata({ category: 'news' });
* ```
*/
getDocumentsByMetadata(filter: Record<string, any>): VectorDocument[];
/**
* Clear all documents from the store
*/
clear(): void;
/**
* Get the number of documents in the store
* @returns Number of documents stored
*/
size(): number;
/**
* Check if the store is empty
* @returns True if no documents are stored
*/
isEmpty(): boolean;
/**
* Get embedding dimensions from stored documents
* @returns Number of dimensions or null if no documents
*/
getEmbeddingDimensions(): number | null;
/**
* Get comprehensive statistics about the vector store
* @returns Statistics including document count, dimensions, and memory usage
*/
getStats(): VectorStoreStats;
/**
* Check if OpenAI instance is available
* @returns True if OpenAI instance is configured
*/
hasOpenAI(): boolean;
/**
* Get current embedding model name
* @returns Embedding model name
*/
getEmbeddingModel(): string;
/**
* Set embedding model name
* @param model New embedding model name
*/
setEmbeddingModel(model: string): void;
/**
* Get default batch size for embedding operations
* @returns Default batch size
*/
getDefaultBatchSize(): number;
/**
* Set default batch size for embedding operations
* @param batchSize New default batch size
*/
setDefaultBatchSize(batchSize: number): void;
/**
* Get default batch delay
* @returns Default batch delay in milliseconds
*/
getDefaultBatchDelay(): number;
/**
* Set default batch delay
* @param delay New default batch delay in milliseconds
*/
setDefaultBatchDelay(delay: number): void;
/**
* Split array into chunks of specified size
*/
private chunkArray;
/**
* Delay execution for specified milliseconds
*/
private delay;
/**
* Validate document structure and embedding
*/
private validateDocument;
/**
* Validate embedding dimensions against existing documents
*/
private validateEmbeddingDimensions;
/**
* Filter documents by metadata criteria
*/
private filterByMetadata;
/**
* Calculate similarity between two vectors using specified method
*/
private calculateSimilarity;
/**
* Calculate cosine similarity between two vectors
* @returns Similarity score between -1 and 1 (1 being identical)
*/
private cosineSimilarity;
/**
* Calculate Euclidean similarity between two vectors
* @returns Similarity score between 0 and 1 (1 being identical)
*/
private euclideanSimilarity;
/**
* Calculate dot product similarity between two vectors
* @returns Dot product value
*/
private dotProductSimilarity;
/**
* Search for similar documents with pagination metadata
* @param query The query (can be embedding vector or text string)
* @param options Search configuration options (must include pageSize)
* @returns Object with data (results), page, pageSize, totalResults, totalPages
*/
searchWithPagination(query: number[] | string, options?: SearchOptions): Promise<{
data: SearchResult[];
page: number;
pageSize: number;
totalResults: number;
totalPages: number;
}>;
}
export {};
//# sourceMappingURL=vector-store.d.ts.map