UNPKG

@forge-ml/rag

Version:

A RAG (Retrieval-Augmented Generation) package for Forge ML

43 lines (42 loc) 1.96 kB
import { ChunkingStrategy, } from "../types"; import chunkText from "../simple/split"; //returns ragger object const createRagger = (embedder, stores) => { const vectorStore = stores.vectorStore; const docStore = stores.docStore; return { embedder, vectorStore, docStore, //@QUESTION we only use the document id does it make sense to pass in the document? If we pass in doc id the call looks like //const relevantChunks = await ragger.query(query, document.getForgeMetadata().documentId, 5); query: async (query, documentIds, k = 3) => { const queryVector = await embedder.generateEmbedding(query); const embeddings = await vectorStore.queryEmbeddings({ query: queryVector, k, documentIds, }); // Get the chunks const relevantChunks = await docStore.mergeChunksAndEmbeddings(embeddings, documentIds); return relevantChunks; }, initializeDocument: async (document, options) => { // chunk the document const chunks = chunkText(document, { strategy: options?.strategy || ChunkingStrategy.BY_WORD_COUNT, delimiter: options?.delimiter, wordCount: options?.wordCount, }); // embed the chunks const embeddings = await embedder.embedChunks(chunks, document.getForgeMetadata().documentId); // store the embeddings in a vector store const embeddingPromise = vectorStore.storeEmbeddings(embeddings); //@QUESTION: in minio should documents and chunks be in the same folder or have there own folder in minio const docStorePromise = docStore.storeDocument(document, chunks); await Promise.all([embeddingPromise, docStorePromise]); return chunks; }, }; }; export default createRagger;