@forge-ml/rag
Version:
A RAG (Retrieval-Augmented Generation) package for Forge ML
86 lines (85 loc) • 2.73 kB
TypeScript
interface Metadata {
documentId: string;
page?: number;
}
interface ChunkMetadata {
documentId: string;
chunkId: string;
}
interface DocumentClass {
getUserMetadata: () => Record<string, any>;
getForgeMetadata: () => Metadata;
getText: () => string;
}
interface Chunk {
id: string;
forgeMetadata: ChunkMetadata;
metadata: Record<string, any>;
text: string;
}
interface Embedding {
chunkId: string;
embedding: number[];
documentId: string;
}
interface ScoredEmbedding {
chunkId: string;
score: number;
}
type RelevantChunk = {
chunkId: string;
text: string;
score: number;
};
interface VectorStore {
storeEmbeddings: (embeddings: Embedding[]) => Promise<void>;
queryEmbeddings: (query: number[], k: number) => Promise<ScoredEmbedding[]>;
}
/**
* What do we need from a doc store
* CRUD on documents
*
* Store each documents chunks?
*
* For now lets just say we store a single document at a time - so we can CRUD over one doc
* Later one we need to add document ids and way provide them to the user
* User "initializes a document" - from ragger document and its chunks get stored in doc store while embeddings get stored in vector store
*
* Use document id from vector store to get chunks
*
*/
type DocStore = {
storeDocument: (document: DocumentClass, chunks: Chunk[]) => Promise<void>;
retrieveDocument: (documentId: string) => Promise<DocumentClass>;
updateDocument: (document: DocumentClass, documentId: string) => Promise<void>;
deleteDocument: (documentId: string) => Promise<void>;
retrieveChunks: (document: DocumentClass) => Promise<Chunk[]>;
queryFromEmbeddings: (embeddings: ScoredEmbedding[], document: DocumentClass) => Promise<RelevantChunk[]>;
};
type StoresClass = {
vectorStore: VectorStore;
docStore: DocStore;
};
interface Embedder {
generateEmbedding: (text: string) => Promise<number[]>;
embedChunks: (chunks: Chunk[], documentId: string) => Promise<Embedding[]>;
}
type EmbedderOptions = {
type: "openai" | "nomic";
apiKey: string;
};
declare enum ChunkingStrategy {
BY_PARAGRAPH = "by_paragraph",
BY_SENTENCE = "by_sentence",
BY_ITEM_IN_LIST = "by_item_in_list",
BY_CUSTOM_DELIMITER = "by_custom_delimiter"
}
type InitializeDocumentOptions = {
strategy: Exclude<ChunkingStrategy, ChunkingStrategy.BY_CUSTOM_DELIMITER>;
delimiter?: undefined;
} | {
strategy: ChunkingStrategy.BY_CUSTOM_DELIMITER;
delimiter: string;
};
export { ChunkingStrategy };
export type { Chunk, DocumentClass, Embedding, ScoredEmbedding, VectorStore, Embedder, EmbedderOptions, InitializeDocumentOptions, StoresClass, DocStore, RelevantChunk, Metadata, };