UNPKG

llamaindex

Version:

<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>

1,352 lines (1,325 loc) 62.8 kB
Object.defineProperty(exports, '__esModule', { value: true }); var prompts = require('@llamaindex/core/prompts'); var schema = require('@llamaindex/core/schema'); var utils = require('@llamaindex/core/utils'); var global = require('@llamaindex/core/global'); var indices = require('@llamaindex/core/indices'); var nodeParser = require('@llamaindex/core/node-parser'); var env = require('@llamaindex/env'); var dataStructs = require('@llamaindex/core/data-structs'); var responseSynthesizers = require('@llamaindex/core/response-synthesizers'); var retriever = require('@llamaindex/core/retriever'); var _ = require('lodash'); var chatEngine = require('@llamaindex/core/chat-engine'); var decorator = require('@llamaindex/core/decorator'); var memory = require('@llamaindex/core/memory'); var queryEngine = require('@llamaindex/core/query-engine'); require('../../selectors/dist/index.cjs'); var indexStore = require('@llamaindex/core/storage/index-store'); var embeddings = require('@llamaindex/core/embeddings'); var vectorStore = require('@llamaindex/core/vector-store'); var docStore = require('@llamaindex/core/storage/doc-store'); var kvStore = require('@llamaindex/core/storage/kv-store'); function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; } var ___default = /*#__PURE__*/_interopDefault(_); /** * @internal */ class GlobalSettings { #prompt; #promptHelper; #nodeParser; #chunkOverlap; #promptHelperAsyncLocalStorage; #nodeParserAsyncLocalStorage; #chunkOverlapAsyncLocalStorage; #promptAsyncLocalStorage; get debug() { return global.Settings.debug; } get llm() { return global.Settings.llm; } set llm(llm) { global.Settings.llm = llm; } withLLM(llm, fn) { return global.Settings.withLLM(llm, fn); } get promptHelper() { if (this.#promptHelper === null) { this.#promptHelper = new indices.PromptHelper(); } return this.#promptHelperAsyncLocalStorage.getStore() ?? this.#promptHelper; } set promptHelper(promptHelper) { this.#promptHelper = promptHelper; } withPromptHelper(promptHelper, fn) { return this.#promptHelperAsyncLocalStorage.run(promptHelper, fn); } get embedModel() { return global.Settings.embedModel; } set embedModel(embedModel) { global.Settings.embedModel = embedModel; } withEmbedModel(embedModel, fn) { return global.Settings.withEmbedModel(embedModel, fn); } get nodeParser() { if (this.#nodeParser === null) { this.#nodeParser = new nodeParser.SentenceSplitter({ chunkSize: this.chunkSize, chunkOverlap: this.chunkOverlap }); } return this.#nodeParserAsyncLocalStorage.getStore() ?? this.#nodeParser; } set nodeParser(nodeParser) { this.#nodeParser = nodeParser; } withNodeParser(nodeParser, fn) { return this.#nodeParserAsyncLocalStorage.run(nodeParser, fn); } get callbackManager() { return global.Settings.callbackManager; } set callbackManager(callbackManager) { global.Settings.callbackManager = callbackManager; } withCallbackManager(callbackManager, fn) { return global.Settings.withCallbackManager(callbackManager, fn); } set chunkSize(chunkSize) { global.Settings.chunkSize = chunkSize; } get chunkSize() { return global.Settings.chunkSize; } withChunkSize(chunkSize, fn) { return global.Settings.withChunkSize(chunkSize, fn); } get chunkOverlap() { return this.#chunkOverlapAsyncLocalStorage.getStore() ?? this.#chunkOverlap; } set chunkOverlap(chunkOverlap) { if (typeof chunkOverlap === "number") { this.#chunkOverlap = chunkOverlap; } } withChunkOverlap(chunkOverlap, fn) { return this.#chunkOverlapAsyncLocalStorage.run(chunkOverlap, fn); } get prompt() { return this.#promptAsyncLocalStorage.getStore() ?? this.#prompt; } set prompt(prompt) { this.#prompt = prompt; } withPrompt(prompt, fn) { return this.#promptAsyncLocalStorage.run(prompt, fn); } constructor(){ this.#prompt = {}; this.#promptHelper = null; this.#nodeParser = null; this.#promptHelperAsyncLocalStorage = new env.AsyncLocalStorage(); this.#nodeParserAsyncLocalStorage = new env.AsyncLocalStorage(); this.#chunkOverlapAsyncLocalStorage = new env.AsyncLocalStorage(); this.#promptAsyncLocalStorage = new env.AsyncLocalStorage(); } } const Settings = new GlobalSettings(); const defaultUserPrompt = new prompts.PromptTemplate({ templateVars: [ "query", "referenceAnswer", "generatedAnswer" ], template: ` ## User Query {query} ## Reference Answer {referenceAnswer} ## Generated Answer {generatedAnswer} ` }); const defaultCorrectnessSystemPrompt = new prompts.PromptTemplate({ template: `You are an expert evaluation system for a question answering chatbot. You are given the following information: - a user query, and - a generated answer You may also be given a reference answer to use for reference in your evaluation. Your job is to judge the relevance and correctness of the generated answer. Output a single score that represents a holistic evaluation. You must return your response in a line with only the score. Do not return answers in any other format. On a separate line provide your reasoning for the score as well. Follow these guidelines for scoring: - Your score has to be between 1 and 5, where 1 is the worst and 5 is the best. - If the generated answer is not relevant to the user query, you should give a score of 1. - If the generated answer is relevant but contains mistakes, you should give a score between 2 and 3. - If the generated answer is relevant and fully correct, you should give a score between 4 and 5. Example Response: 4.0 The generated answer has the exact same metrics as the reference answer but it is not as concise. ` }); const defaultFaithfulnessRefinePrompt = new prompts.PromptTemplate({ templateVars: [ "query", "existingAnswer", "context" ], template: ` We want to understand if the following information is present in the context information: {query} We have provided an existing YES/NO answer: {existingAnswer} We have the opportunity to refine the existing answer (only if needed) with some more context below. ------------ {context} ------------ If the existing answer was already YES, still answer YES. If the information is present in the new context, answer YES. Otherwise answer NO. ` }); const defaultFaithfulnessTextQaPrompt = new prompts.PromptTemplate({ templateVars: [ "context", "query" ], template: ` Please tell if a given piece of information is supported by the context. You need to answer with either YES or NO. Answer YES if any of the context supports the information, even if most of the context is unrelated. Some examples are provided below. Information: Apple pie is generally double-crusted. Context: An apple pie is a fruit pie in which the principal filling ingredient is apples. Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard or cheddar cheese. It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips). Answer: YES Information: Apple pies tastes bad. Context: An apple pie is a fruit pie in which the principal filling ingredient is apples. Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard or cheddar cheese. It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips). Answer: NO Information: {query} Context: {context} Answer: ` }); const defaultRelevancyEvalPrompt = new prompts.PromptTemplate({ templateVars: [ "context", "query" ], template: `Your task is to evaluate if the response for the query is in line with the context information provided. You have two options to answer. Either YES/ NO. Answer - YES, if the response for the query is in line with context information otherwise NO. Query and Response: {query} Context: {context} Answer: ` }); const defaultRelevancyRefinePrompt = new prompts.PromptTemplate({ templateVars: [ "query", "existingAnswer", "contextMsg" ], template: `We want to understand if the following query and response is in line with the context information: {query} We have provided an existing YES/NO answer: {existingAnswer} We have the opportunity to refine the existing answer (only if needed) with some more context below. ------------ {contextMsg} ------------ If the existing answer was already YES, still answer YES. If the information is present in the new context, answer YES. Otherwise answer NO. ` }); const defaultEvaluationParser = (evalResponse)=>{ const [scoreStr, reasoningStr] = evalResponse.split("\n"); const score = parseFloat(scoreStr); const reasoning = reasoningStr.trim(); return [ score, reasoning ]; }; /** Correctness Evaluator */ class CorrectnessEvaluator extends prompts.PromptMixin { constructor(params){ super(), this.correctnessPrompt = defaultCorrectnessSystemPrompt; this.llm = Settings.llm; this.correctnessPrompt = defaultCorrectnessSystemPrompt; this.scoreThreshold = params?.scoreThreshold ?? 4.0; this.parserFunction = params?.parserFunction ?? defaultEvaluationParser; } _getPrompts() { return { correctnessPrompt: this.correctnessPrompt }; } _getPromptModules() { return {}; } _updatePrompts(prompts) { if ("correctnessPrompt" in prompts) { this.correctnessPrompt = prompts["correctnessPrompt"]; } } /** * * @param query Query to evaluate * @param response Response to evaluate * @param contexts Array of contexts * @param reference Reference response */ async evaluate({ query, response, contexts, reference }) { if (query === null || response === null) { throw new Error("query, and response must be provided"); } const messages = [ { role: "system", content: this.correctnessPrompt.format() }, { role: "user", content: defaultUserPrompt.format({ query: utils.extractText(query), generatedAnswer: response, referenceAnswer: reference || "(NO REFERENCE ANSWER SUPPLIED)" }) } ]; const evalResponse = await this.llm.chat({ messages }); const [score, reasoning] = this.parserFunction(utils.extractText(evalResponse.message.content)); return { query: query, response: response, passing: score >= this.scoreThreshold || score === null, score: score, feedback: reasoning }; } /** * @param query Query to evaluate * @param response Response to evaluate */ async evaluateResponse({ query, response }) { const responseStr = utils.extractText(response?.message.content); const contexts = []; if (response) { for (const node of response.sourceNodes || []){ contexts.push(node.node.getContent(schema.MetadataMode.ALL)); } } return this.evaluate({ query, response: responseStr, contexts }); } } function applyDecs2203RFactory() { function createAddInitializerMethod(initializers, decoratorFinishedRef) { return function addInitializer(initializer) { assertNotFinished(decoratorFinishedRef, "addInitializer"); assertCallable(initializer, "An initializer"); initializers.push(initializer); }; } function memberDec(dec, name, desc, initializers, kind, isStatic, isPrivate, metadata, value) { var kindStr; switch(kind){ case 1: kindStr = "accessor"; break; case 2: kindStr = "method"; break; case 3: kindStr = "getter"; break; case 4: kindStr = "setter"; break; default: kindStr = "field"; } var ctx = { kind: kindStr, name: isPrivate ? "#" + name : name, static: isStatic, private: isPrivate, metadata: metadata }; var decoratorFinishedRef = { v: false }; ctx.addInitializer = createAddInitializerMethod(initializers, decoratorFinishedRef); var get, set; if (kind === 0) { if (isPrivate) { get = desc.get; set = desc.set; } else { get = function() { return this[name]; }; set = function(v) { this[name] = v; }; } } else if (kind === 2) { get = function() { return desc.value; }; } else { if (kind === 1 || kind === 3) { get = function() { return desc.get.call(this); }; } if (kind === 1 || kind === 4) { set = function(v) { desc.set.call(this, v); }; } } ctx.access = get && set ? { get: get, set: set } : get ? { get: get } : { set: set }; try { return dec(value, ctx); } finally{ decoratorFinishedRef.v = true; } } function assertNotFinished(decoratorFinishedRef, fnName) { if (decoratorFinishedRef.v) { throw new Error("attempted to call " + fnName + " after decoration was finished"); } } function assertCallable(fn, hint) { if (typeof fn !== "function") { throw new TypeError(hint + " must be a function"); } } function assertValidReturnValue(kind, value) { var type = typeof value; if (kind === 1) { if (type !== "object" || value === null) { throw new TypeError("accessor decorators must return an object with get, set, or init properties or void 0"); } if (value.get !== undefined) { assertCallable(value.get, "accessor.get"); } if (value.set !== undefined) { assertCallable(value.set, "accessor.set"); } if (value.init !== undefined) { assertCallable(value.init, "accessor.init"); } } else if (type !== "function") { var hint; if (kind === 0) { hint = "field"; } else if (kind === 10) { hint = "class"; } else { hint = "method"; } throw new TypeError(hint + " decorators must return a function or void 0"); } } function applyMemberDec(ret, base, decInfo, name, kind, isStatic, isPrivate, initializers, metadata) { var decs = decInfo[0]; var desc, init, value; if (isPrivate) { if (kind === 0 || kind === 1) { desc = { get: decInfo[3], set: decInfo[4] }; } else if (kind === 3) { desc = { get: decInfo[3] }; } else if (kind === 4) { desc = { set: decInfo[3] }; } else { desc = { value: decInfo[3] }; } } else if (kind !== 0) { desc = Object.getOwnPropertyDescriptor(base, name); } if (kind === 1) { value = { get: desc.get, set: desc.set }; } else if (kind === 2) { value = desc.value; } else if (kind === 3) { value = desc.get; } else if (kind === 4) { value = desc.set; } var newValue, get, set; if (typeof decs === "function") { newValue = memberDec(decs, name, desc, initializers, kind, isStatic, isPrivate, metadata, value); if (newValue !== void 0) { assertValidReturnValue(kind, newValue); if (kind === 0) { init = newValue; } else if (kind === 1) { init = newValue.init; get = newValue.get || value.get; set = newValue.set || value.set; value = { get: get, set: set }; } else { value = newValue; } } } else { for(var i = decs.length - 1; i >= 0; i--){ var dec = decs[i]; newValue = memberDec(dec, name, desc, initializers, kind, isStatic, isPrivate, metadata, value); if (newValue !== void 0) { assertValidReturnValue(kind, newValue); var newInit; if (kind === 0) { newInit = newValue; } else if (kind === 1) { newInit = newValue.init; get = newValue.get || value.get; set = newValue.set || value.set; value = { get: get, set: set }; } else { value = newValue; } if (newInit !== void 0) { if (init === void 0) { init = newInit; } else if (typeof init === "function") { init = [ init, newInit ]; } else { init.push(newInit); } } } } } if (kind === 0 || kind === 1) { if (init === void 0) { init = function(instance, init) { return init; }; } else if (typeof init !== "function") { var ownInitializers = init; init = function(instance, init) { var value = init; for(var i = 0; i < ownInitializers.length; i++){ value = ownInitializers[i].call(instance, value); } return value; }; } else { var originalInitializer = init; init = function(instance, init) { return originalInitializer.call(instance, init); }; } ret.push(init); } if (kind !== 0) { if (kind === 1) { desc.get = value.get; desc.set = value.set; } else if (kind === 2) { desc.value = value; } else if (kind === 3) { desc.get = value; } else if (kind === 4) { desc.set = value; } if (isPrivate) { if (kind === 1) { ret.push(function(instance, args) { return value.get.call(instance, args); }); ret.push(function(instance, args) { return value.set.call(instance, args); }); } else if (kind === 2) { ret.push(value); } else { ret.push(function(instance, args) { return value.call(instance, args); }); } } else { Object.defineProperty(base, name, desc); } } } function applyMemberDecs(Class, decInfos, metadata) { var ret = []; var protoInitializers; var staticInitializers; var existingProtoNonFields = new Map(); var existingStaticNonFields = new Map(); for(var i = 0; i < decInfos.length; i++){ var decInfo = decInfos[i]; if (!Array.isArray(decInfo)) continue; var kind = decInfo[1]; var name = decInfo[2]; var isPrivate = decInfo.length > 3; var isStatic = kind >= 5; var base; var initializers; if (isStatic) { base = Class; kind = kind - 5; staticInitializers = staticInitializers || []; initializers = staticInitializers; } else { base = Class.prototype; protoInitializers = protoInitializers || []; initializers = protoInitializers; } if (kind !== 0 && !isPrivate) { var existingNonFields = isStatic ? existingStaticNonFields : existingProtoNonFields; var existingKind = existingNonFields.get(name) || 0; if (existingKind === true || existingKind === 3 && kind !== 4 || existingKind === 4 && kind !== 3) { throw new Error("Attempted to decorate a public method/accessor that has the same name as a previously decorated public method/accessor. This is not currently supported by the decorators plugin. Property name was: " + name); } else if (!existingKind && kind > 2) { existingNonFields.set(name, kind); } else { existingNonFields.set(name, true); } } applyMemberDec(ret, base, decInfo, name, kind, isStatic, isPrivate, initializers, metadata); } pushInitializers(ret, protoInitializers); pushInitializers(ret, staticInitializers); return ret; } function pushInitializers(ret, initializers) { if (initializers) { ret.push(function(instance) { for(var i = 0; i < initializers.length; i++){ initializers[i].call(instance); } return instance; }); } } function applyClassDecs(targetClass, classDecs, metadata) { if (classDecs.length > 0) { var initializers = []; var newClass = targetClass; var name = targetClass.name; for(var i = classDecs.length - 1; i >= 0; i--){ var decoratorFinishedRef = { v: false }; try { var nextNewClass = classDecs[i](newClass, { kind: "class", name: name, addInitializer: createAddInitializerMethod(initializers, decoratorFinishedRef), metadata }); } finally{ decoratorFinishedRef.v = true; } if (nextNewClass !== undefined) { assertValidReturnValue(10, nextNewClass); newClass = nextNewClass; } } return [ defineMetadata(newClass, metadata), function() { for(var i = 0; i < initializers.length; i++){ initializers[i].call(newClass); } } ]; } } function defineMetadata(Class, metadata) { return Object.defineProperty(Class, Symbol.metadata || Symbol.for("Symbol.metadata"), { configurable: true, enumerable: true, value: metadata }); } return function applyDecs2203R(targetClass, memberDecs, classDecs, parentClass) { if (parentClass !== void 0) { var parentMetadata = parentClass[Symbol.metadata || Symbol.for("Symbol.metadata")]; } var metadata = Object.create(parentMetadata === void 0 ? null : parentMetadata); var e = applyMemberDecs(targetClass, memberDecs, metadata); if (!classDecs.length) defineMetadata(targetClass, metadata); return { e: e, get c () { return applyClassDecs(targetClass, classDecs, metadata); } }; }; } function _apply_decs_2203_r(targetClass, memberDecs, classDecs, parentClass) { return (_apply_decs_2203_r = applyDecs2203RFactory())(targetClass, memberDecs, classDecs, parentClass); } var _initProto; /** * CondenseQuestionChatEngine is used in conjunction with a Index (for example VectorStoreIndex). * It does two steps on taking a user's chat message: first, it condenses the chat message * with the previous chat history into a question with more context. * Then, it queries the underlying Index using the new question with context and returns * the response. * CondenseQuestionChatEngine performs well when the input is primarily questions about the * underlying data. It performs less well when the chat messages are not questions about the * data, or are very referential to previous context. */ class CondenseQuestionChatEngine extends chatEngine.BaseChatEngine { static{ ({ e: [_initProto] } = _apply_decs_2203_r(this, [ [ decorator.wrapEventCaller, 2, "chat" ] ], [])); } get chatHistory() { return this.memory.getLLM(); } constructor(init){ super(), _initProto(this); this.queryEngine = init.queryEngine; this.memory = memory.createMemory(init.chatHistory); this.llm = Settings.llm; this.condenseMessagePrompt = init?.condenseMessagePrompt ?? prompts.defaultCondenseQuestionPrompt; } _getPromptModules() { return {}; } _getPrompts() { return { condenseMessagePrompt: this.condenseMessagePrompt }; } _updatePrompts(promptsDict) { if (promptsDict.condenseMessagePrompt) { this.condenseMessagePrompt = promptsDict.condenseMessagePrompt; } } async condenseQuestion(chatHistory, question) { const chatHistoryStr = utils.messagesToHistory(await chatHistory.getLLM()); return this.llm.complete({ prompt: this.condenseMessagePrompt.format({ question: question, chatHistory: chatHistoryStr }) }); } async chat(params) { const { message, stream } = params; const chatHistory = params.chatHistory ? params.chatHistory instanceof memory.Memory ? params.chatHistory : memory.createMemory(params.chatHistory) : this.memory; const condensedQuestion = (await this.condenseQuestion(chatHistory, utils.extractText(message))).text; await chatHistory.add({ content: message, role: "user" }); if (stream) { const stream = await this.queryEngine.query({ query: condensedQuestion, stream: true }); return utils.streamReducer({ stream, initialValue: "", reducer: (accumulator, part)=>accumulator += utils.extractText(part.message.content), finished: (accumulator)=>{ void chatHistory.add({ content: accumulator, role: "assistant" }); } }); } const response = await this.queryEngine.query({ query: condensedQuestion }); await chatHistory.add({ content: response.message.content, role: "assistant" }); return response; } reset() { void this.memory.clear(); } } // FS utility helpers /** * Checks if a file exists. * Analogous to the os.path.exists function from Python. * @param path The path to the file to check. * @returns A promise that resolves to true if the file exists, false otherwise. */ async function exists(path) { try { await env.fs.access(path); return true; } catch { return false; } } const LEARNER_MODES = new Set([ vectorStore.VectorStoreQueryMode.SVM, vectorStore.VectorStoreQueryMode.LINEAR_REGRESSION, vectorStore.VectorStoreQueryMode.LOGISTIC_REGRESSION ]); const MMR_MODE = vectorStore.VectorStoreQueryMode.MMR; // Mapping of filter operators to metadata filter functions const OPERATOR_TO_FILTER = { [vectorStore.FilterOperator.EQ]: ({ key, value }, metadata)=>{ return metadata[key] === vectorStore.parsePrimitiveValue(value); }, [vectorStore.FilterOperator.NE]: ({ key, value }, metadata)=>{ return metadata[key] !== vectorStore.parsePrimitiveValue(value); }, [vectorStore.FilterOperator.IN]: ({ key, value }, metadata)=>{ return !!vectorStore.parseArrayValue(value).find((v)=>metadata[key] === v); }, [vectorStore.FilterOperator.NIN]: ({ key, value }, metadata)=>{ return !vectorStore.parseArrayValue(value).find((v)=>metadata[key] === v); }, [vectorStore.FilterOperator.ANY]: ({ key, value }, metadata)=>{ if (!Array.isArray(metadata[key])) return false; return vectorStore.parseArrayValue(value).some((v)=>metadata[key].includes(v)); }, [vectorStore.FilterOperator.ALL]: ({ key, value }, metadata)=>{ if (!Array.isArray(metadata[key])) return false; return vectorStore.parseArrayValue(value).every((v)=>metadata[key].includes(v)); }, [vectorStore.FilterOperator.TEXT_MATCH]: ({ key, value }, metadata)=>{ return metadata[key].includes(vectorStore.parsePrimitiveValue(value)); }, [vectorStore.FilterOperator.CONTAINS]: ({ key, value }, metadata)=>{ if (!Array.isArray(metadata[key])) return false; return !!vectorStore.parseArrayValue(metadata[key]).find((v)=>v === value); }, [vectorStore.FilterOperator.GT]: ({ key, value }, metadata)=>{ return metadata[key] > vectorStore.parsePrimitiveValue(value); }, [vectorStore.FilterOperator.LT]: ({ key, value }, metadata)=>{ return metadata[key] < vectorStore.parsePrimitiveValue(value); }, [vectorStore.FilterOperator.GTE]: ({ key, value }, metadata)=>{ return metadata[key] >= vectorStore.parsePrimitiveValue(value); }, [vectorStore.FilterOperator.LTE]: ({ key, value }, metadata)=>{ return metadata[key] <= vectorStore.parsePrimitiveValue(value); } }; // Build a filter function based on the metadata and the preFilters const buildFilterFn = (metadata, preFilters)=>{ if (!preFilters) return true; if (!metadata) return false; const { filters, condition } = preFilters; const queryCondition = condition || "and"; // default to and const itemFilterFn = (filter)=>{ if (filter.operator === vectorStore.FilterOperator.IS_EMPTY) { // for `is_empty` operator, return true if the metadata key is not present or the value is empty const value = metadata[filter.key]; return value === undefined || value === null || value === "" || Array.isArray(value) && value.length === 0; } if (metadata[filter.key] === undefined) { // for other operators, always return false if the metadata key is not present return false; } const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator]; if (!metadataLookupFn) throw new Error(`Unsupported operator: ${filter.operator}`); return metadataLookupFn(filter, metadata); }; if (queryCondition === "and") return filters.every(itemFilterFn); return filters.some(itemFilterFn); }; class SimpleVectorStoreData { constructor(){ this.embeddingDict = {}; this.textIdToRefDocId = {}; this.metadataDict = {}; } } class SimpleVectorStore extends vectorStore.BaseVectorStore { constructor(init){ super(init), this.storesText = false; this.data = init?.data || new SimpleVectorStoreData(); } static async fromPersistDir(persistDir = global.DEFAULT_PERSIST_DIR, embedModel, options) { const persistPath = env.path.join(persistDir, "vector_store.json"); return await SimpleVectorStore.fromPersistPath(persistPath, embedModel, options); } client() { return null; } async get(textId) { return this.data.embeddingDict[textId]; } async add(embeddingResults) { for (const node of embeddingResults){ this.data.embeddingDict[node.id_] = node.getEmbedding(); if (!node.sourceNode) { continue; } this.data.textIdToRefDocId[node.id_] = node.sourceNode?.nodeId; // Add metadata to the metadataDict const metadata = vectorStore.nodeToMetadata(node, true, undefined, false); delete metadata["_node_content"]; this.data.metadataDict[node.id_] = metadata; } if (this.persistPath) { await this.persist(this.persistPath); } return embeddingResults.map((result)=>result.id_); } async delete(refDocId) { const textIdsToDelete = Object.keys(this.data.textIdToRefDocId).filter((textId)=>this.data.textIdToRefDocId[textId] === refDocId); for (const textId of textIdsToDelete){ delete this.data.embeddingDict[textId]; delete this.data.textIdToRefDocId[textId]; if (this.data.metadataDict) delete this.data.metadataDict[textId]; } if (this.persistPath) { await this.persist(this.persistPath); } return Promise.resolve(); } async filterNodes(query) { const items = Object.entries(this.data.embeddingDict); const queryFilterFn = (nodeId)=>{ const metadata = this.data.metadataDict[nodeId]; return buildFilterFn(metadata, query.filters); }; const nodeFilterFn = (nodeId)=>{ if (!query.docIds) return true; const availableIds = new Set(query.docIds); return availableIds.has(nodeId); }; const queriedItems = items.filter((item)=>nodeFilterFn(item[0]) && queryFilterFn(item[0])); const nodeIds = queriedItems.map((item)=>item[0]); const embeddings = queriedItems.map((item)=>item[1]); return { nodeIds, embeddings }; } async query(query) { const { nodeIds, embeddings: embeddings$1 } = await this.filterNodes(query); const queryEmbedding = query.queryEmbedding; let topSimilarities, topIds; if (LEARNER_MODES.has(query.mode)) { // fixme: unfinished throw new Error("Learner modes not implemented for SimpleVectorStore yet."); } else if (query.mode === MMR_MODE) { const mmrThreshold = query.mmrThreshold; [topSimilarities, topIds] = embeddings.getTopKMMREmbeddings(queryEmbedding, embeddings$1, null, query.similarityTopK, nodeIds, mmrThreshold); } else if (query.mode === vectorStore.VectorStoreQueryMode.DEFAULT) { [topSimilarities, topIds] = embeddings.getTopKEmbeddings(queryEmbedding, embeddings$1, query.similarityTopK, nodeIds); } else { throw new Error(`Invalid query mode: ${query.mode}`); } return Promise.resolve({ similarities: topSimilarities, ids: topIds }); } async persist(persistPath = env.path.join(global.DEFAULT_PERSIST_DIR, "vector_store.json")) { await SimpleVectorStore.persistData(persistPath, this.data); } static async persistData(persistPath, data) { const dirPath = env.path.dirname(persistPath); if (!await exists(dirPath)) { await env.fs.mkdir(dirPath); } await env.fs.writeFile(persistPath, JSON.stringify(data)); } static async fromPersistPath(persistPath, embedModel, options) { const logger = options?.logger ?? env.consoleLogger; const dirPath = env.path.dirname(persistPath); if (!await exists(dirPath)) { await env.fs.mkdir(dirPath, { recursive: true }); } let dataDict = {}; if (!await exists(persistPath)) { logger.log(`Starting new store from path: ${persistPath}`); } else { try { const fileData = await env.fs.readFile(persistPath); dataDict = JSON.parse(fileData.toString()); } catch (e) { throw new Error(`Failed to load data from path: ${persistPath}`, { cause: e }); } } const data = new SimpleVectorStoreData(); // @ts-expect-error TS2322 data.embeddingDict = dataDict.embeddingDict ?? {}; // @ts-expect-error TS2322 data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {}; // @ts-expect-error TS2322 data.metadataDict = dataDict.metadataDict ?? {}; const store = new SimpleVectorStore({ data, embedModel }); store.persistPath = persistPath; return store; } static fromDict(saveDict, embedModel) { const data = new SimpleVectorStoreData(); data.embeddingDict = saveDict.embeddingDict; data.textIdToRefDocId = saveDict.textIdToRefDocId; data.metadataDict = saveDict.metadataDict; return new SimpleVectorStore({ data, embedModel }); } toDict() { return { embeddingDict: this.data.embeddingDict, textIdToRefDocId: this.data.textIdToRefDocId, metadataDict: this.data.metadataDict }; } } class SimpleDocumentStore extends docStore.KVDocumentStore { constructor(kvStore$1, namespace){ kvStore$1 = kvStore$1 || new kvStore.SimpleKVStore(); namespace = namespace || global.DEFAULT_NAMESPACE; super(kvStore$1, namespace); this.kvStore = kvStore$1; } static async fromPersistDir(persistDir = global.DEFAULT_PERSIST_DIR, namespace, options) { const persistPath = env.path.join(persistDir, global.DEFAULT_DOC_STORE_PERSIST_FILENAME); return await SimpleDocumentStore.fromPersistPath(persistPath, namespace, options); } static async fromPersistPath(persistPath, namespace, options) { const simpleKVStore = await kvStore.SimpleKVStore.fromPersistPath(persistPath, options); return new SimpleDocumentStore(simpleKVStore, namespace); } async persist(persistPath = env.path.join(global.DEFAULT_PERSIST_DIR, global.DEFAULT_DOC_STORE_PERSIST_FILENAME)) { if (___default.default.isObject(this.kvStore) && this.kvStore instanceof kvStore.BaseInMemoryKVStore) { await this.kvStore.persist(persistPath); } } static fromDict(saveDict, namespace) { const simpleKVStore = kvStore.SimpleKVStore.fromDict(saveDict); return new SimpleDocumentStore(simpleKVStore, namespace); } toDict() { if (___default.default.isObject(this.kvStore) && this.kvStore instanceof kvStore.SimpleKVStore) { return this.kvStore.toDict(); } // If the kvstore is not a SimpleKVStore, you might want to throw an error or return a default value. throw new Error("KVStore is not a SimpleKVStore"); } } async function storageContextFromDefaults({ docStore, indexStore: indexStore$1, vectorStore, vectorStores, persistDir }) { vectorStores = vectorStores ?? {}; if (!persistDir) { docStore = docStore ?? new SimpleDocumentStore(); indexStore$1 = indexStore$1 ?? new indexStore.SimpleIndexStore(); if (!(schema.ModalityType.TEXT in vectorStores)) { vectorStores[schema.ModalityType.TEXT] = vectorStore ?? new SimpleVectorStore(); } } else { const embedModel = Settings.embedModel; docStore = docStore || await SimpleDocumentStore.fromPersistDir(persistDir, global.DEFAULT_NAMESPACE); indexStore$1 = indexStore$1 || await indexStore.SimpleIndexStore.fromPersistDir(persistDir); if (!(schema.ObjectType.TEXT in vectorStores)) { vectorStores[schema.ModalityType.TEXT] = vectorStore ?? await SimpleVectorStore.fromPersistDir(persistDir, embedModel); } } return { docStore, indexStore: indexStore$1, vectorStores }; } const transformToJSON = (obj)=>{ // eslint-disable-next-line @typescript-eslint/no-explicit-any const seen = []; // eslint-disable-next-line @typescript-eslint/no-explicit-any const replacer = (key, value)=>{ if (value != null && typeof value == "object") { if (seen.indexOf(value) >= 0) { return; } seen.push(value); } return value; }; // this is a custom replacer function that will allow us to handle circular references const jsonStr = JSON.stringify(obj, replacer); return jsonStr; }; function getTransformationHash(nodes, transform) { const nodesStr = nodes.map((node)=>node.getContent(schema.MetadataMode.ALL)).join(""); const transformString = transformToJSON(transform); const hash = env.createSHA256(); hash.update(nodesStr + transformString + transform.id); return hash.digest(); } async function runTransformations(nodesToRun, transformations, // eslint-disable-next-line @typescript-eslint/no-explicit-any transformOptions = {}, { inPlace = true, cache, docStoreStrategy } = {}) { let nodes = nodesToRun; if (!inPlace) { nodes = [ ...nodesToRun ]; } if (docStoreStrategy) { nodes = await docStoreStrategy(nodes); } for (const transform of transformations){ if (cache) { const hash = getTransformationHash(nodes, transform); const cachedNodes = await cache.get(hash); if (cachedNodes) { nodes = cachedNodes; } else { nodes = await transform(nodes, transformOptions); await cache.put(hash, nodes); } } else { nodes = await transform(nodes, transformOptions); } } return nodes; } const DEFAULT_NAME = "query_engine_tool"; const DEFAULT_DESCRIPTION = "Useful for running a natural language query against a knowledge base and get back a natural language response."; const DEFAULT_PARAMETERS = { type: "object", properties: { query: { type: "string", description: "The query to search for" } }, required: [ "query" ] }; class QueryEngineTool { constructor({ queryEngine, metadata, includeSourceNodes }){ this.queryEngine = queryEngine; this.metadata = { name: metadata?.name ?? DEFAULT_NAME, description: metadata?.description ?? DEFAULT_DESCRIPTION, parameters: metadata?.parameters ?? DEFAULT_PARAMETERS }; this.includeSourceNodes = includeSourceNodes ?? false; } async call({ query }) { const response = await this.queryEngine.query({ query }); if (!this.includeSourceNodes) { return { content: response.message.content }; } return { content: response.message.content, sourceNodes: response.sourceNodes }; } } /** * Indexes are the data structure that we store our nodes and embeddings in so * they can be retrieved for our queries. */ class BaseIndex { constructor(init){ this.storageContext = init.storageContext; this.docStore = init.docStore; this.indexStore = init.indexStore; this.indexStruct = init.indexStruct; } /** * Returns a query tool by calling asQueryEngine. * Either options or retriever can be passed, but not both. * If options are provided, they are passed to generate a retriever. */ asQueryTool(params) { if (params.options) { params.retriever = this.asRetriever(params.options); } return new QueryEngineTool({ queryEngine: this.asQueryEngine(params), metadata: params?.metadata, includeSourceNodes: params?.includeSourceNodes ?? false }); } /** * Insert a document into the index. * @param document */ async insert(document) { const nodes = await runTransformations([ document ], [ Settings.nodeParser ]); await this.insertNodes(nodes); await this.docStore.setDocumentHash(document.id_, document.hash); } /** * Alias for asRetriever * @param options */ // eslint-disable-next-line @typescript-eslint/no-explicit-any retriever(options) { return this.asRetriever(options); } /** * Alias for asQueryEngine * @param options you can supply your own custom Retriever and ResponseSynthesizer */ queryEngine(options) { return this.asQueryEngine(options); } /** * Alias for asQueryTool * Either options or retriever can be passed, but not both. * If options are provided, they are passed to generate a retriever. */ queryTool(params) { return this.asQueryTool(params); } } const defaultFormatNodeBatchFn = (summaryNodes)=>{ return summaryNodes.map((node, idx)=>{ return ` Document ${idx + 1}: ${node.getContent(schema.MetadataMode.LLM)} `.trim(); }).join("\n\n"); }; const defaultParseChoiceSelectAnswerFn = (answer, numChoices, raiseErr = false)=>{ // split the line into the answer number and relevance score portions const lineTokens = answer.split("\n").map((line)=>{ const lineTokens = line.split(","); if (lineTokens.length !== 2) { if (raiseErr) { throw new Error(`Invalid answer line: ${line}. Answer line must be of the form: answer_num: <int>, answer_relevance: <float>`); } else { return null; } } return lineTokens; }).filter((lineTokens)=>!___default.default.isNil(lineTokens)); // parse the answer number and relevance score return lineTokens.reduce((parseResult, lineToken)=>{ try { const docNum = parseInt(lineToken[0].split(":")[1].trim()); const answerRelevance = parseFloat(lineToken[1].split(":")[1].trim()); if (docNum < 1 || docNum > numChoices) { if (raiseErr) { throw new Error(`Invalid answer number: ${docNum}. Answer number must be between 1 and ${numChoices}`); } } else { parseResult[docNum] = answerRelevance; } } catch (e) { if (raiseErr) { throw e; } } return parseResult; }, {}); }; /** * A SummaryIndex keeps nodes in a sequential order for use with summarization. */ class SummaryIndex extends BaseIndex { constructor(init){ super(init); } static async init(options) { const storageContext = options.storageContext ?? await storageContextFromDefaults({}); const { docStore, indexStore } = storageContext; // Setup IndexStruct from storage const indexStructs = await indexStore.getIndexStructs(); let indexStruct; if (options.indexStruct && indexStructs.length > 0) { throw new Error("Cannot initialize index with both indexStruct and indexStore"); } if (options.indexStruct) { indexStruct = options.indexStruct; } else if (indexStructs.length == 1) { indexStruct = indexStructs[0].type === dataStructs.IndexStructType.LIST ? indexStructs[0] : null; } else if (indexStructs.length > 1 && options.indexId) { indexStruct = await indexStore.getIndexStruct(options.indexId); } else { indexStruct = null; } // check indexStruct type if (indexStruct && indexStruct.type !== dataStructs.IndexStructType.LIST) { throw new Error("Attempting to initialize SummaryIndex with non-list indexStruct"); } if (indexStruct) { if (options.nodes) { throw new Error("Cannot initialize SummaryIndex with both nodes and indexStruct"); } } else { if (!options.nodes) { throw new Error("Cannot initialize SummaryIndex without nodes or indexStruct"); } indexStruct = await SummaryIndex.buildIndexFromNodes(options.nodes, storageContext.docStore); await indexStore.addIndexStruct(indexStruct); }