UNPKG

@elimeleth/builderbot-langchain

Version:

Interface para crear chatbot con Builderbot & LangChain

515 lines (493 loc) 17.8 kB
'use strict'; var bot = require('@builderbot/bot'); require('dotenv/config'); var contextual_compression = require('langchain/retrievers/contextual_compression'); var embeddings_filter = require('langchain/retrievers/document_compressors/embeddings_filter'); var document_compressors = require('langchain/retrievers/document_compressors'); var fastEmbedding = require('@builderbot-plugins/fast-embedding'); var prompts = require('@langchain/core/prompts'); var messages = require('@langchain/core/messages'); var runnables = require('@langchain/core/runnables'); var output_parsers = require('@langchain/core/output_parsers'); var openai = require('@langchain/openai'); var googleGenai = require('@langchain/google-genai'); var axios = require('axios'); var documents = require('@langchain/core/documents'); var directory = require('langchain/document_loaders/fs/directory'); var json = require('langchain/document_loaders/fs/json'); var text = require('langchain/document_loaders/fs/text'); var csv = require('langchain/document_loaders/fs/csv'); var pdf = require('langchain/document_loaders/fs/pdf'); var retrievers = require('@langchain/core/retrievers'); var hnswlib = require('@langchain/community/vectorstores/hnswlib'); var output_parsers$1 = require('langchain/output_parsers'); var z = require('zod'); class ContextualCompression { constructor(retriever, embeddings) { this.retriever = retriever; this.embeddings = embeddings; this.invoke = async (query) => { return await this.contextual.getRelevantDocuments(query); }; this.init(); } init() { this.contextual = new contextual_compression.ContextualCompressionRetriever({ baseCompressor: this.compressorPipeline, baseRetriever: this.retriever, }); } get embeddingFilter() { return new embeddings_filter.EmbeddingsFilter({ embeddings: this?.embeddings?.model || new fastEmbedding.FastEmbedding('AllMiniLML6V2'), similarityThreshold: this?.embeddings?.similarityThreshold || 0.77, k: this?.embeddings?.k || 5 }); } get compressorPipeline() { return new document_compressors.DocumentCompressorPipeline({ transformers: [this.embeddingFilter] }); } } const SYSTEM_STRUCT = `Based on the conversation history: {history} if you don't know the answer, just only return null. Answer the users question as best as possible. {format_instructions}`; const PROMT = prompts.ChatPromptTemplate.fromMessages([ ["system", SYSTEM_STRUCT], new prompts.MessagesPlaceholder("history"), ["human", "{question}"], ]); const SYSTEM_RAG = `Your task is to answer the users question based on the provided context. only answer the question if you can and only describe the product if you can DON'T sell the product and dont sugests the price's product. Use the following pieces of retrieved context to answer the question. {context} if the product is not in the context, but there is a products that is related to it, answer the question based on the products. else answer the question based on the context. return a response in the language {language} and lowercase YOU DON'T NOT QUESTIONS ONLY ANSWER AND SUGGESTES AND DON'T COMMENT PRICE'S PRODUCTS Answer the users question as best as possible. {format_instructions}`; const SYSTEM_PROMPT = prompts.ChatPromptTemplate.fromMessages([ ["system", SYSTEM_RAG], new prompts.MessagesPlaceholder("history"), ["human", "{question}"], ]); class MemoryHistory { constructor() { // eslint-disable-next-line @typescript-eslint/no-explicit-any this.memory = async (inside, _state) => { let memory = _state.get('memory') ?? []; memory.push(new messages.HumanMessage({ content: [ { type: 'text', text: inside.user } ] }), new messages.AIMessage({ content: [ { type: 'text', text: inside.assistant } ] })); await _state.update({ memory }); }; // eslint-disable-next-line @typescript-eslint/no-explicit-any this.getMemory = async (_state, k = 4) => { const memory = _state.get('memory') ?? []; const limitHistory = memory.slice(-k); await _state.update({ memory: limitHistory }); return limitHistory.flat(); }; // eslint-disable-next-line @typescript-eslint/no-explicit-any this.clearMemory = async (_state) => { try { _state['memory'].clear(); } catch (_) { _state['memory'] = []; } }; } } var Memory = new MemoryHistory(); class Runnable { constructor(model, prompt) { this.model = model; this.prompt = prompt; } async retriever(context, invokeParams, schema) { let prompt = this.prompt ? prompts.ChatPromptTemplate.fromTemplate(this.prompt) : SYSTEM_PROMPT; if (this.model?.withStructuredOutput) { prompt = prompt .pipe(this.model.withStructuredOutput(schema)); } else { prompt = prompt.pipe(this.model .pipe(new output_parsers.JsonOutputParser())); } return runnables.RunnableSequence.from([ runnables.RunnablePassthrough.assign({ context: async () => { return context; } }), prompt ]).invoke(invokeParams); } } class FactoryModel { constructor(ai) { this.ai = ai; this.initModel(ai?.modelName, ai?.args); } get instance() { return this.ai?.modelName || 'openai'; } createTemplateMessage(invokeParams) { const question = new messages.HumanMessage({ content: [ { type: "text", text: invokeParams.question }, ] }); const system = SYSTEM_STRUCT .replace('{question}', invokeParams.question) .replace('{history}', JSON.stringify(invokeParams.history)) .replace('{format_instructions}', invokeParams?.format_instructions || ''); const template = new messages.SystemMessage({ content: system, name: 'system', }); return [template].concat(question); } async createStructure(invokeParams, llmStructuredOutputTool) { if (this.model?.withStructuredOutput) { return await PROMT .pipe(this.model.withStructuredOutput(llmStructuredOutputTool)) .invoke(invokeParams); } return await this.model .pipe(new output_parsers.JsonOutputParser()) .invoke(this.createTemplateMessage(invokeParams)); } initModel(model, args) { if (!model) { this.model = new googleGenai.ChatGoogleGenerativeAI({ modelName: args?.modelName || 'gemini-pro', maxOutputTokens: args?.maxOutputTokens || 2048, apiKey: args?.apikey || process.env.GOOGLE_API_KEY }); return; } if (model === 'gemini') { this.model = new googleGenai.ChatGoogleGenerativeAI({ modelName: args?.modelName || 'gemini-pro', maxOutputTokens: args?.maxOutputTokens || 2048, apiKey: args?.apikey || process.env.GOOGLE_API_KEY }); } else { this.model = new openai.ChatOpenAI({ modelName: args?.modelName || 'gpt-3.5-turbo-16k', maxTokens: args?.maxOutputTokens || 2048, openAIApiKey: args?.apikey || process.env.OPENAI_API_KEY }); } } } var httpRequest = async (url, args) => { try { const { data } = await axios(url, { headers: { ...args.headers, 'Content-Type': 'application/json' } }); return data; } catch (error) { if (error?.response?.data) { return error.response.data; } if (error && !error?.response?.data) { throw error; } return []; } }; class StoreRetriever extends retrievers.BaseRetriever { constructor(urlOrPath, schema, store, embbedgins, fields) { super(fields); this.urlOrPath = urlOrPath; this.schema = schema; this.store = store; this.embbedgins = embbedgins; this.lc_namespace = ["langchain", "retrievers"]; this.ingest().then(() => console.log('Ingested')).catch(err => { throw err; }); } async ingest() { const embeddings = this?.embbedgins || new fastEmbedding.FastEmbedding('AllMiniLML6V2'); if (!this.store) { this.store = hnswlib.HNSWLib; } if (!this.store?.fromDocuments) { throw new Error('Store must have a fromDocuments method'); } const url_re = /^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/; if (url_re.test(this.urlOrPath)) { if (!this.schema) { throw new Error('You must set the schema array first'); } const data = await httpRequest(this.urlOrPath, {}); if (!Array.isArray(data) || !data.length) { throw new Error('The data must be an array with at least one element'); } const obj = data.map((d) => Object.keys(d).map(key => { if (this.schema.includes(key)) { return data[key]; } })); const documents$1 = obj.map((d) => new documents.Document({ pageContent: Object.entries(d).map(([k, v]) => `${k}: ${v}`).join('\n'), metadata: d })); this.store = await this.store?.fromDocuments(documents$1, embeddings); } const loader = new directory.DirectoryLoader(this.urlOrPath, { ".json": (path) => new json.JSONLoader(path, "/text"), ".jsonl": (path) => new json.JSONLinesLoader(path, "/html"), ".txt": (path) => new text.TextLoader(path), ".pdf": (path) => new pdf.PDFLoader(path), ".csv": (path) => new csv.CSVLoader(path, "text"), }); const documents$1 = await loader.load(); this.store = await this.store?.fromDocuments(documents$1, embeddings); } async _getRelevantDocuments(query, runManager) { return await this.store.asRetriever()._getRelevantDocuments(query); } } var schemasFn = async (question, schema, model, state) => { try { const responseSchema = await model.createStructure({ question, language: 'spanish', history: await Memory.getMemory(state, 4), format_instructions: new output_parsers$1.StructuredOutputParser(schema).getFormatInstructions() }, schema); Memory.memory({ user: question, assistant: JSON.stringify(responseSchema) }, state); return responseSchema; } catch (error) { throw error; } }; var _a$3; class StructLayer { } _a$3 = StructLayer; StructLayer.setZodSchema = (schema) => { _a$3.schema = schema; return _a$3; }; StructLayer.setAIModel = (ai) => { _a$3.model = new FactoryModel(ai); return _a$3; }; StructLayer.create = (cb) => { if (!_a$3.schema) { throw new Error('You must set the zod schema method first'); } if (!_a$3.model) { _a$3.model = new FactoryModel(); } return async (ctx, methods) => { try { const schema = await schemasFn(ctx.body, _a$3.schema, _a$3.model, methods.state); ctx.schema = schema; } catch (error) { ctx.schema = null; } return await cb(ctx, methods); }; }; var _a$2; class TransformLayer { } _a$2 = TransformLayer; TransformLayer.setZodSchema = (schema) => { _a$2.schema = schema; return _a$2; }; TransformLayer.setAIModel = (ai) => { _a$2.model = new FactoryModel(ai); return _a$2; }; TransformLayer.create = (cb) => { if (!_a$2.schema) { throw new Error('You must set the zod schema method first'); } if (!_a$2.model) { _a$2.model = new FactoryModel(); } return async (ctx, methods) => { try { const schema = await schemasFn(ctx.body, _a$2.schema, _a$2.model, methods.state); ctx.context = schema; } catch (error) { ctx.context = null; } return await cb(ctx, methods); }; }; class CustomRetriever extends retrievers.BaseRetriever { constructor(searchFn, fields) { super(fields); this.searchFn = searchFn; this.lc_namespace = ["langchain", "retrievers"]; } async _getRelevantDocuments(query, runManager) { const results = await this.searchFn(query); return results.map((result) => new documents.Document({ pageContent: Object.entries(result).map(([k, v]) => `${k}: ${v}`).join('\n'), metadata: result })); } } var _a$1; class StructuredOutput { } _a$1 = StructuredOutput; StructuredOutput.kwrd = bot.addKeyword(bot.EVENTS.ACTION); StructuredOutput.setKeyword = (ev) => { _a$1.kwrd = bot.addKeyword(ev, { sensitive: false }); return _a$1; }; StructuredOutput.setZodSchema = (schema) => { _a$1.schema = schema; return _a$1; }; StructuredOutput.setAIModel = (ai) => { _a$1.model = new FactoryModel(ai); return _a$1; }; StructuredOutput.create = (callbacks) => { if (!_a$1.schema) { throw new Error('You must set the zod schema method first'); } if (!_a$1.model) { _a$1.model = new FactoryModel(); } _a$1.kwrd = callbacks?.beforeStart && callbacks?.beforeStart(_a$1.kwrd) || _a$1.kwrd; _a$1.kwrd = _a$1.kwrd.addAction(async (ctx, { state }) => { try { const responseSchema = await schemasFn(ctx.body, _a$1.schema, _a$1.model, state); await state.update({ schema: responseSchema }); } catch (error) { callbacks?.onFailure && callbacks?.onFailure(error); await state.update({ schema: null }); } }); _a$1.kwrd = callbacks?.afterEnd && callbacks?.afterEnd(_a$1.kwrd) || _a$1.kwrd; return _a$1.kwrd; }; var _a; class createAIFlow { static setContextual(k, similarityThreshold, model) { if (!this.store) { throw new Error('You must set the store first'); } this.contextual = new ContextualCompression(this.store, { k, model, similarityThreshold }); return this; } } _a = createAIFlow; createAIFlow.kwrd = bot.addKeyword(bot.EVENTS.ACTION); createAIFlow.model = new FactoryModel(); createAIFlow.setKeyword = (ev) => { _a.kwrd = bot.addKeyword(ev, { sensitive: false }); return _a; }; createAIFlow.setAIModel = (ai) => { _a.model = new FactoryModel(ai); return _a; }; createAIFlow.setZodSchema = (schema) => { _a.schema = schema; return _a; }; createAIFlow.setStore = (store) => { if (!store?.urlOrPath && !store?.searchFn) { throw new Error('Either urlOrPath or searchFn must be provided'); } if (Object.keys(store).includes('urlOrPath')) { _a.store = new StoreRetriever(store.urlOrPath, store?.schema, store?.store, store?.embbedgins); } else { _a.store = new CustomRetriever(store.searchFn, store?.fields); } return _a; }; createAIFlow.setCatchLayer = (schema, cb, capture = false) => { _a.kwrd = _a.kwrd.addAction({ capture }, StructLayer.setZodSchema(schema).create(cb)); return _a; }; createAIFlow.setTransformLayer = (schema, cb, capture = false) => { _a.kwrd = _a.kwrd.addAction({ capture }, TransformLayer.setZodSchema(schema).create(cb)); return _a; }; createAIFlow.pipe = (fn) => { _a.kwrd = fn(_a.kwrd); return _a; }; createAIFlow.createRunnable = (opts, callbacks) => { if (!_a.schema) { _a.schema = opts?.answerSchema || z.object({ answer: z.string().describe('Answer as best possible') }); } if (!_a.contextual) { _a.contextual = new ContextualCompression(_a.store); } const format_instructions = new output_parsers$1.StructuredOutputParser(_a.schema).getFormatInstructions(); _a.kwrd = _a.kwrd.addAction(async (ctx, { state }) => { try { const context = await _a.contextual.invoke(ctx?.context || ctx.body); const mapContext = context.map(doc => doc.pageContent).join('\n'); const answer = await new Runnable(_a.model.model, opts?.prompt).retriever(mapContext, { question: ctx.body, language: 'spanish', history: await Memory.getMemory(state) || [], format_instructions }, _a.schema); Memory.memory({ user: ctx.body, assistant: JSON.stringify(answer) }, state); await state.update({ answer }); } catch (error) { callbacks?.onFailure && callbacks?.onFailure(error); await state.update({ answer: null }); } }); return _a; }; createAIFlow.createFlow = () => { return _a.kwrd; }; exports.StructFlow = StructuredOutput; exports.StructLayer = StructLayer; exports.TransformLayer = TransformLayer; exports.createAIFlow = createAIFlow;