@forge-ml/rag
Version:
A RAG (Retrieval-Augmented Generation) package for Forge ML
141 lines (140 loc) • 5.78 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const minio_1 = require("minio");
class MinioDocStore {
client;
//@TODO: These should not be hardcoded or class properties - this is only because we are starting with one document for now - fix
bucketName = "doc-store"; //@TODO make this constructor parameter
documentName = "doc";
static DOCUMENT_FILE = "documents";
chunksName = "chunks";
static CHUNKS_FILE = "chunks";
constructor(endpoint, port, useSSL, accessKey, secretKey) {
this.client = new minio_1.Client({
endPoint: endpoint,
port: port,
useSSL: useSSL,
accessKey: accessKey,
secretKey: secretKey,
});
this.initializeBucket();
}
async initializeBucket() {
const bucketExists = await this.client.bucketExists(this.bucketName);
if (!bucketExists) {
await this.client.makeBucket(this.bucketName, "us-east-1");
//console.log(`Bucket '${this.bucketName}' created successfully.`);
return;
}
return;
}
async storeDocument(document, chunks) {
const docPath = `${document.getForgeMetadata().documentId}/${MinioDocStore.DOCUMENT_FILE}`;
const chunksPath = `${document.getForgeMetadata().documentId}/${MinioDocStore.CHUNKS_FILE}`;
try {
await Promise.all([
this.client.putObject(this.bucketName, docPath, Buffer.from(JSON.stringify(document))),
this.client.putObject(this.bucketName, chunksPath, Buffer.from(JSON.stringify(chunks))),
]);
//console.log(`Document stored with Name: ${this.documentName}`);
return;
}
catch (error) {
console.error("Error storing document:", error);
throw error;
}
}
async retrieveDocument(documentId) {
const docPath = `${documentId}/${MinioDocStore.DOCUMENT_FILE}`;
try {
const document = await this.client.getObject(this.bucketName, docPath);
const documentString = await this.streamToString(document);
const documentObject = JSON.parse(documentString);
return documentObject;
}
catch (error) {
console.error("Error retrieving document:", error);
throw error;
}
}
//@TODO: fix - this is broken
async updateDocument(document, documentId) {
const docPath = `${documentId}/${MinioDocStore.DOCUMENT_FILE}`;
//@TODO fix - this is broken
try {
// Check if the document exists before updating
const exists = await this.client
.statObject(this.bucketName, docPath)
.catch(() => false);
if (!exists) {
throw new Error(`Document with ID ${documentId} does not exist.`);
}
// Proceed with update
await this.client.putObject(this.bucketName, docPath, Buffer.from(JSON.stringify(document)));
}
catch (error) {
console.error("Error updating document:", error);
throw error;
}
}
async deleteDocument(documentId) {
const docPath = `${documentId}/${MinioDocStore.DOCUMENT_FILE}`;
try {
//check if document exists
//statObject returns metadata on the document
const documentExists = await this.client.statObject(this.bucketName, docPath);
//check if chunks exist
const chunksPath = `${documentId}/${MinioDocStore.CHUNKS_FILE}`;
const chunksExists = await this.client.statObject(this.bucketName, chunksPath);
if (!documentExists || !chunksExists) {
throw new Error(`Document "${docPath}" does not exist.`);
}
await Promise.all([
this.client.removeObject(this.bucketName, docPath),
this.client.removeObject(this.bucketName, chunksPath),
]);
}
catch (error) {
console.error("Error deleting document:", error);
throw error;
}
}
//@QUESTION: should we pass in the document or just use the document id?
async retrieveChunks(document) {
const chunksPath = `${document.getForgeMetadata().documentId}/${MinioDocStore.CHUNKS_FILE}`;
const bucketExists = await this.client.bucketExists(this.bucketName);
if (!bucketExists) {
throw new Error(`Bucket "${this.bucketName}" does not exist.`);
}
const chunks = await this.client.getObject(this.bucketName, chunksPath);
const chunksString = await this.streamToString(chunks);
const chunksObject = JSON.parse(chunksString);
if (!chunksObject) {
throw new Error(`No chunks found in bucket "${this.bucketName}"`);
}
return chunksObject;
}
async queryFromEmbeddings(embeddings, document) {
const chunks = await this.retrieveChunks(document);
const relevantChunks = embeddings.map((embedding) => {
return {
...embedding,
text: chunks.find((c) => c.id === embedding.chunkId)?.text || "",
};
});
return relevantChunks;
}
async deleteBucket() {
await this.client.removeBucket(this.bucketName);
}
//helper
async streamToString(stream) {
return new Promise((resolve, reject) => {
const chunks = [];
stream.on("data", (chunk) => chunks.push(chunk));
stream.on("error", reject);
stream.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
});
}
}
exports.default = MinioDocStore;