@forge-ml/rag
Version:
A RAG (Retrieval-Augmented Generation) package for Forge ML
43 lines (42 loc) • 1.96 kB
JavaScript
import { ChunkingStrategy, } from "../types";
import chunkText from "../simple/split";
//returns ragger object
const createRagger = (embedder, stores) => {
const vectorStore = stores.vectorStore;
const docStore = stores.docStore;
return {
embedder,
vectorStore,
docStore,
//@QUESTION we only use the document id does it make sense to pass in the document? If we pass in doc id the call looks like
//const relevantChunks = await ragger.query(query, document.getForgeMetadata().documentId, 5);
query: async (query, documentIds, k = 3) => {
const queryVector = await embedder.generateEmbedding(query);
const embeddings = await vectorStore.queryEmbeddings({
query: queryVector,
k,
documentIds,
});
// Get the chunks
const relevantChunks = await docStore.mergeChunksAndEmbeddings(embeddings, documentIds);
return relevantChunks;
},
initializeDocument: async (document, options) => {
// chunk the document
const chunks = chunkText(document, {
strategy: options?.strategy || ChunkingStrategy.BY_WORD_COUNT,
delimiter: options?.delimiter,
wordCount: options?.wordCount,
});
// embed the chunks
const embeddings = await embedder.embedChunks(chunks, document.getForgeMetadata().documentId);
// store the embeddings in a vector store
const embeddingPromise = vectorStore.storeEmbeddings(embeddings);
//@QUESTION: in minio should documents and chunks be in the same folder or have there own folder in minio
const docStorePromise = docStore.storeDocument(document, chunks);
await Promise.all([embeddingPromise, docStorePromise]);
return chunks;
},
};
};
export default createRagger;