UNPKG

askexperts

Version:

AskExperts SDK: build and use AI experts - ask them questions and pay with bitcoin on an open protocol

128 lines 4.92 kB
import { debugDocstore } from "../common/debug.js"; /** * Class to sync documents from a DocStore to a RagDB */ export class DocstoreToRag { /** * Creates a new DocstoreToRag instance * @param ragDB - The RagDB instance to store embeddings in * @param docStoreClient - The DocStoreClient to get documents from */ constructor(ragDB, docStoreClient) { this.activeSubscriptions = new Set(); this.ragDB = ragDB; this.docStoreClient = docStoreClient; } /** * Converts a Doc to an array of RagDocuments (one per embedding) * @param doc - The document to convert * @param onDocMeta - Optional callback to customize metadata * @returns Array of RagDocuments with vectors and metadata */ async docToRagDocuments(doc, onDocMeta) { // Skip documents without embeddings if (!doc.embeddings || doc.embeddings.length === 0) { return []; } try { // Convert Float32Array[] to regular arrays for the RAG DB const vectors = doc.embeddings.map((embedding) => { // Convert Float32Array to regular array return Array.from(embedding); }); // Create a RagDocument for each embedding vector const ragDocuments = []; for (let i = 0; i < vectors.length; i++) { const vector = vectors[i]; // Create default metadata (excluding data and embeddings) let metadata = { id: doc.id, docstore_id: doc.docstore_id, type: doc.type, timestamp: doc.timestamp, created_at: doc.created_at, chunk: i, }; // Use custom metadata if callback provided if (onDocMeta) { metadata = await onDocMeta(doc, vector); } ragDocuments.push({ id: `${doc.docstore_id}:${doc.id}:${i}`, vector, metadata, }); } return ragDocuments; } catch (error) { console.error(`Error parsing embeddings for document ${doc.id}:`, error); return []; } } /** * Syncs documents from a docstore to the RagDB * @param options - Options for syncing * @returns Subscription with a stop method */ async sync(options) { const { docstore_id, collection_name, type, onDoc, onEof, onDocMeta } = options; // Batch of documents to store let batch = []; const BATCH_SIZE = 100; // Subscribe to the docstore const subscription = await this.docStoreClient.subscribe({ docstore_id, type }, async (doc) => { // If doc is undefined, it signals EOF if (!doc) { // Store any remaining documents in the batch if (batch.length > 0) { await this.ragDB.storeBatch(collection_name, batch); debugDocstore("Synced batch", batch.length); batch = []; } // Call the onEof callback if provided if (onEof) { onEof(); } } else { // onDoc callback might filter this doc if (onDoc && !(await onDoc(doc))) return; // Convert the document to multiple RagDocuments (one per embedding) const ragDocuments = await this.docToRagDocuments(doc, onDocMeta); // Add the documents to the batch for (const ragDocument of ragDocuments) { batch.push(ragDocument); // If the batch is full, store it and reset if (batch.length >= BATCH_SIZE) { await this.ragDB.storeBatch(collection_name, batch); debugDocstore("Synced batch", batch.length, BATCH_SIZE); batch = []; } } } }); // Store the subscription this.activeSubscriptions.add(subscription); // Return an object with a stop method return { stop: () => { subscription.close(); this.activeSubscriptions.delete(subscription); }, }; } /** * Closes all active subscriptions when the object is disposed */ [Symbol.dispose]() { // Close all active subscriptions for (const subscription of this.activeSubscriptions.values()) { subscription.close(); } // Clear the subscriptions map this.activeSubscriptions.clear(); } } //# sourceMappingURL=DocstoreToRag.js.map