@llm-tools/embedjs
Version:
A NodeJS RAG framework to easily work with LLMs and custom datasets
185 lines (184 loc) • 11.5 kB
TypeScript
import { RAGApplicationBuilder } from './rag-application-builder.js';
import { AddLoaderReturn, BaseLoader, Chunk, QueryResponse } from '@llm-tools/embedjs-interfaces';
export declare class RAGApplication {
private readonly debug;
private readonly storeConversationsToDefaultThread;
private readonly embeddingRelevanceCutOff;
private readonly searchResultCount;
private readonly systemMessage;
private readonly vectorDatabase;
private readonly embeddingModel;
private readonly store;
private loaders;
private model;
constructor(llmBuilder: RAGApplicationBuilder);
/**
* The function initializes various components of a language model using provided configurations
* and data. This is an internal method and does not need to be invoked manually.
* @param {RAGApplicationBuilder} llmBuilder - The `llmBuilder` parameter in the `init` function is
* an instance of the `RAGApplicationBuilder` class. It is used to build and configure a Language
* Model (LLM) for a conversational AI system. The function initializes various components of the
* LLM based on the configuration provided
*/
init(llmBuilder: RAGApplicationBuilder): Promise<void>;
/**
* The function getModel retrieves a specific BaseModel or SIMPLE_MODEL based on the input provided.
* @param {BaseModel | SIMPLE_MODELS | null} model - The `getModel` function you provided is an
* asynchronous function that takes a parameter `model` of type `BaseModel`, `SIMPLE_MODELS`, or
* `null`.
* @returns The `getModel` function returns a Promise that resolves to a `BaseModel` object. If the
* `model` parameter is an object, it returns the object itself. If the `model` parameter is
* `null`, it returns `null`. If the `model` parameter is a specific value from the `SIMPLE_MODELS`
* enum, it creates a new `BaseModel` object based on the model name.
*/
private getModel;
/**
* The function `embedChunks` embeds the content of chunks by invoking the planned embedding model.
* @param {Pick<Chunk, 'pageContent'>[]} chunks - The `chunks` parameter is an array of objects
* that have a property `pageContent` which contains text content for each chunk.
* @returns The `embedChunks` function is returning the embedded vectors for the chunks.
*/
private embedChunks;
/**
* The function `getChunkUniqueId` generates a unique identifier by combining a loader unique ID and
* an increment ID.
* @param {string} loaderUniqueId - A unique identifier for the loader.
* @param {number} incrementId - The `incrementId` parameter is a number that represents the
* increment value used to generate a unique chunk identifier.
* @returns The function `getChunkUniqueId` returns a string that combines the `loaderUniqueId` and
* `incrementId`.
*/
private getChunkUniqueId;
/**
* The function `addLoader` asynchronously initalizes a loader using the provided parameters and adds
* it to the system.
* @param {LoaderParam} loaderParam - The `loaderParam` parameter is a string, object or instance of BaseLoader
* that contains the necessary information to create a loader.
* @param {boolean} forceReload - The `forceReload` parameter is a boolean used to indicate if a loader should be reloaded.
* By default, loaders which have been previously run are not reloaded.
* @returns The function `addLoader` returns an object with the following properties:
* - `entriesAdded`: Number of new entries added during the loader operation
* - `uniqueId`: Unique identifier of the loader
* - `loaderType`: Name of the loader's constructor class
*/
addLoader(loaderParam: BaseLoader, forceReload?: boolean): Promise<AddLoaderReturn>;
/**
* The function `_addLoader` asynchronously adds a loader, processes its chunks, and handles
* incremental loading if supported by the loader.
* @param {BaseLoader} loader - The `loader` parameter in the `_addLoader` method is an instance of the
* `BaseLoader` class.
* @returns The function `_addLoader` returns an object with the following properties:
* - `entriesAdded`: Number of new entries added during the loader operation
* - `uniqueId`: Unique identifier of the loader
* - `loaderType`: Name of the loader's constructor class
*/
private _addLoader;
/**
* The `incrementalLoader` function asynchronously processes incremental chunks for a loader.
* @param {string} uniqueId - The `uniqueId` parameter is a string that serves as an identifier for
* the loader.
* @param incrementalGenerator - The `incrementalGenerator` parameter is an asynchronous generator
* function that yields `LoaderChunk` objects. It is used to incrementally load chunks of data for a specific loader
*/
private incrementalLoader;
/**
* The function `getLoaders` asynchronously retrieves a list of loaders loaded so far. This includes
* internal loaders that were loaded by other loaders. It requires that cache is enabled to work.
* @returns The list of loaders with some metadata about them.
*/
getLoaders(): Promise<import("@llm-tools/embedjs-interfaces").LoaderListEntry[]>;
/**
* The function `batchLoadChunks` processes chunks of data in batches and formats them for insertion.
* @param {string} uniqueId - The `uniqueId` parameter is a string that represents a unique
* identifier for loader being processed.
* @param generator - The `incrementalGenerator` parameter in the `batchLoadChunks`
* function is an asynchronous generator that yields `LoaderChunk` objects.
* @returns The `batchLoadChunks` function returns an object with two properties:
* 1. `newInserts`: The total number of new inserts made during the batch loading process.
* 2. `formattedChunks`: An array containing the formatted chunks that were processed during the
* batch loading process.
*/
private batchLoadChunks;
/**
* The function `batchLoadEmbeddings` asynchronously loads embeddings for formatted chunks and
* inserts them into a vector database.
* @param {string} loaderUniqueId - The `loaderUniqueId` parameter is a unique identifier for the
* loader that is used to load embeddings.
* @param {Chunk[]} formattedChunks - `formattedChunks` is an array of Chunk objects that contain
* page content, metadata, and other information needed for processing. The `batchLoadEmbeddings`
* function processes these chunks in batches to obtain embeddings for each chunk and then inserts
* them into a database for further use.
* @returns The function `batchLoadEmbeddings` returns the result of inserting the embed chunks
* into the vector database.
*/
private batchLoadEmbeddings;
/**
* The function `getEmbeddingsCount` returns the count of embeddings stored in a vector database
* asynchronously.
* @returns The `getEmbeddingsCount` method is returning the number of embeddings stored in the
* vector database. It is an asynchronous function that returns a Promise with the count of
* embeddings as a number.
*/
getEmbeddingsCount(): Promise<number>;
/**
* The function `deleteConversation` deletes all entries related to a particular conversation from the database
* @param {string} conversationId - The `conversationId` that you want to delete. Pass 'default' to delete
* the default conversation thread that is created and maintained automatically
*/
deleteConversation(conversationId: string): Promise<void>;
/**
* The function `deleteLoader` deletes embeddings from a loader after confirming the action.
* @param {string} uniqueLoaderId - The `uniqueLoaderId` parameter is a string that represents the
* identifier of the loader that you want to delete.
* @returns The `deleteLoader` method returns a boolean value indicating the success of the operation.
*/
deleteLoader(uniqueLoaderId: string): Promise<boolean>;
/**
* The function `reset` deletes all embeddings from the vector database if a
* confirmation is provided.
* @returns The `reset` function returns a boolean value indicating the result.
*/
reset(): Promise<boolean>;
/**
* The function `getEmbeddings` retrieves embeddings for a query, performs similarity search,
* filters and sorts the results based on relevance score, and returns a subset of the top results.
* @param {string} cleanQuery - The `cleanQuery` parameter is a string that represents the query
* input after it has been cleaned or processed to remove any unnecessary characters, symbols, or
* noise. This clean query is then used to generate embeddings for similarity search.
* @returns The `getEmbeddings` function returns a filtered and sorted array of search results based
* on the similarity score of the query embedded in the cleanQuery string. The results are filtered
* based on a relevance cutoff value, sorted in descending order of score, and then sliced to return
* only the number of results specified by the `searchResultCount` property.
*/
getEmbeddings(cleanQuery: string): Promise<import("@llm-tools/embedjs-interfaces").ExtractChunkData[]>;
/**
* The `search` function retrieves the unique embeddings for a given query without calling a LLM.
* @param {string} query - The `query` parameter is a string that represents the input query that
* needs to be processed.
* @returns An array of unique page content items / chunks.
*/
search(query: string): Promise<import("@llm-tools/embedjs-interfaces").ExtractChunkData[]>;
/**
* This function takes a user query, retrieves relevant context, identifies unique sources, and
* returns the query result along with the list of sources.
* @param {string} userQuery - The `userQuery` parameter is a string that represents the query
* input provided by the user. It is used as input to retrieve context and ultimately generate a
* result based on the query.
* @param [options] - The `options` parameter in the `query` function is an optional object that
* can have the following properties:
* - conversationId - The `conversationId` parameter in the `query` method is an
* optional parameter that represents the unique identifier for a conversation. It allows you to
* track and associate the query with a specific conversation thread if needed. If provided, it can be
* used to maintain context or history related to the conversation.
* - customContext - You can pass in custom context from your own RAG stack. Passing.
* your own context will disable the inbuilt RAG retrieval for that specific query
* @returns The `query` method returns a Promise that resolves to an object with two properties:
* `result` and `sources`. The `result` property is a string representing the result of querying
* the LLM model with the provided query template, user query, context, and conversation history. The
* `sources` property is an array of strings representing unique sources used to generate the LLM response.
*/
query(userQuery: string, options?: {
conversationId?: string;
customContext?: Chunk[];
}): Promise<QueryResponse>;
}