UNPKG

mongodb-chatbot-server

Version:

A chatbot server for retrieval augmented generation (RAG).

130 lines 5.52 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.includeChunksForMaxTokensPossible = exports.makeRagGenerateUserPrompt = void 0; const common_tags_1 = require("common-tags"); const utils_1 = require("../utils"); const makeDefaultReferenceLinks_1 = require("./makeDefaultReferenceLinks"); const DEFAULT_MAX_CONTEXT_TOKENS = 1500; // magic number for max context tokens for LLM /** Construct a {@link GenerateUserPromptFunc} function that uses retrieval augmented generation (RAG) to generate the user prompt and return references to use in the answer. The returned RAG user prompt generator performs the following steps: 1. Preprocess the user's message using the query preprocessor. 2. Find content using vector search. 3. Removes any chunks that would exceed the max context tokens. 4. Generate the user message using the make user message function. 5. Return the user message and references. */ function makeRagGenerateUserPrompt({ queryPreprocessor, findContent, makeReferenceLinks = makeDefaultReferenceLinks_1.makeDefaultReferenceLinks, maxChunkContextTokens = DEFAULT_MAX_CONTEXT_TOKENS, makeUserMessage, }) { return async ({ userMessageText, conversation, reqId }) => { // --- PREPROCESS --- const preprocessResult = preProcessUserMessage ? await preProcessUserMessage({ queryPreprocessor, userMessageText, conversation, reqId, }) : undefined; const { rejectQuery, query: preprocessedUserMessageContent } = preprocessResult ?? { rejectQuery: false, query: userMessageText, }; if (rejectQuery) { (0, utils_1.logRequest)({ reqId, message: "Preprocessor rejected query", }); return { rejectQuery: true, userMessage: { role: "user", content: userMessageText }, }; } // --- VECTOR SEARCH / RETRIEVAL --- const findContentQuery = preprocessedUserMessageContent ?? userMessageText; const { content, queryEmbedding } = await findContent({ query: findContentQuery, }); if (content.length === 0) { (0, utils_1.logRequest)({ reqId, message: "No matching content found", }); return { userMessage: { role: "user", content: userMessageText, embedding: queryEmbedding, }, rejectQuery: true, }; } (0, utils_1.logRequest)({ reqId, message: (0, common_tags_1.stripIndents) `Chunks found: ${JSON.stringify(content.map(({ chunkAlgoHash, embeddings, ...wantedProperties }) => wantedProperties))}`, }); const references = makeReferenceLinks(content); const includedContent = includeChunksForMaxTokensPossible({ maxTokens: maxChunkContextTokens, content, }); const userMessage = await makeUserMessage({ content: includedContent, originalUserMessage: userMessageText, preprocessedUserMessage: preprocessedUserMessageContent, queryEmbedding, rejectQuery, }); (0, utils_1.logRequest)({ reqId, message: `Latest message sent to LLM: ${JSON.stringify({ role: userMessage.role, content: userMessage.content, })}`, }); return { userMessage, references, rejectQuery: false, }; }; } exports.makeRagGenerateUserPrompt = makeRagGenerateUserPrompt; async function preProcessUserMessage({ queryPreprocessor, userMessageText, conversation, reqId, }) { // Try to preprocess the user's message. If the user's message cannot be preprocessed // (likely due to LLM timeout), then we will just use the original message. if (!queryPreprocessor) { return undefined; } try { const { query, rejectQuery } = await queryPreprocessor({ query: userMessageText, messages: conversation?.messages, }); (0, utils_1.logRequest)({ reqId, message: (0, common_tags_1.stripIndents) `Successfully preprocessed user query. Original query: ${userMessageText} Preprocessed query: ${query}`, }); return { query: query ?? userMessageText, rejectQuery }; } catch (err) { (0, utils_1.logRequest)({ reqId, type: "error", message: `Error preprocessing query: ${JSON.stringify(err)}. Using original query: ${userMessageText}`, }); } } /** This function returns the chunks that can fit in the maxTokens. It limits the number of tokens that are sent to the LLM. */ function includeChunksForMaxTokensPossible({ maxTokens, content, }) { let total = 0; const fitRangeEndIndex = content.findIndex(({ tokenCount }) => (total += tokenCount) > maxTokens); return fitRangeEndIndex === -1 ? content : content.slice(0, fitRangeEndIndex); } exports.includeChunksForMaxTokensPossible = includeChunksForMaxTokensPossible; //# sourceMappingURL=makeRagGenerateUserPrompt.js.map