UNPKG

llamaindex

Version:

<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>

224 lines (223 loc) 9.05 kB
import { IndexList, IndexStructType } from "@llamaindex/core/data-structs"; import { defaultChoiceSelectPrompt } from "@llamaindex/core/prompts"; import { getResponseSynthesizer } from "@llamaindex/core/response-synthesizers"; import { BaseRetriever } from "@llamaindex/core/retriever"; import { extractText } from "@llamaindex/core/utils"; import _ from "lodash"; import { Settings } from "../../Settings.js"; import { ContextChatEngine } from "../../engines/chat/index.js"; import { RetrieverQueryEngine } from "../../engines/query/index.js"; import { storageContextFromDefaults } from "../../storage/StorageContext.js"; import { BaseIndex } from "../BaseIndex.js"; import { defaultFormatNodeBatchFn, defaultParseChoiceSelectAnswerFn } from "./utils.js"; export var SummaryRetrieverMode = /*#__PURE__*/ function(SummaryRetrieverMode) { SummaryRetrieverMode["DEFAULT"] = "default"; // EMBEDDING = "embedding", SummaryRetrieverMode["LLM"] = "llm"; return SummaryRetrieverMode; }({}); /** * A SummaryIndex keeps nodes in a sequential order for use with summarization. */ export class SummaryIndex extends BaseIndex { constructor(init){ super(init); } static async init(options) { const storageContext = options.storageContext ?? await storageContextFromDefaults({}); const { docStore, indexStore } = storageContext; // Setup IndexStruct from storage const indexStructs = await indexStore.getIndexStructs(); let indexStruct; if (options.indexStruct && indexStructs.length > 0) { throw new Error("Cannot initialize index with both indexStruct and indexStore"); } if (options.indexStruct) { indexStruct = options.indexStruct; } else if (indexStructs.length == 1) { indexStruct = indexStructs[0].type === IndexStructType.LIST ? indexStructs[0] : null; } else if (indexStructs.length > 1 && options.indexId) { indexStruct = await indexStore.getIndexStruct(options.indexId); } else { indexStruct = null; } // check indexStruct type if (indexStruct && indexStruct.type !== IndexStructType.LIST) { throw new Error("Attempting to initialize SummaryIndex with non-list indexStruct"); } if (indexStruct) { if (options.nodes) { throw new Error("Cannot initialize SummaryIndex with both nodes and indexStruct"); } } else { if (!options.nodes) { throw new Error("Cannot initialize SummaryIndex without nodes or indexStruct"); } indexStruct = await SummaryIndex.buildIndexFromNodes(options.nodes, storageContext.docStore); await indexStore.addIndexStruct(indexStruct); } return new SummaryIndex({ storageContext, docStore, indexStore, indexStruct }); } static async fromDocuments(documents, args = {}) { let { storageContext } = args; storageContext = storageContext ?? await storageContextFromDefaults({}); const docStore = storageContext.docStore; await docStore.addDocuments(documents, true); for (const doc of documents){ await docStore.setDocumentHash(doc.id_, doc.hash); } const nodes = await Settings.nodeParser.getNodesFromDocuments(documents); const index = await SummaryIndex.init({ nodes, storageContext }); return index; } asRetriever(options) { const { mode = "default" } = options ?? {}; switch(mode){ case "default": return new SummaryIndexRetriever(this); case "llm": return new SummaryIndexLLMRetriever(this); default: throw new Error(`Unknown retriever mode: ${mode}`); } } asQueryEngine(options) { let { retriever, responseSynthesizer } = options ?? {}; if (!retriever) { retriever = this.asRetriever(); } if (!responseSynthesizer) { responseSynthesizer = getResponseSynthesizer("compact"); } return new RetrieverQueryEngine(retriever, responseSynthesizer, options?.nodePostprocessors); } asChatEngine(options) { const { retriever, mode, ...contextChatEngineOptions } = options ?? {}; return new ContextChatEngine({ retriever: retriever ?? this.asRetriever({ mode: mode ?? "default" }), ...contextChatEngineOptions }); } static async buildIndexFromNodes(nodes, docStore, indexStruct) { indexStruct = indexStruct || new IndexList(); await docStore.addDocuments(nodes, true); for (const node of nodes){ indexStruct.addNode(node); } return indexStruct; } async insertNodes(nodes) { for (const node of nodes){ this.indexStruct.addNode(node); } } async deleteRefDoc(refDocId, deleteFromDocStore) { const refDocInfo = await this.docStore.getRefDocInfo(refDocId); if (!refDocInfo) { return; } await this.deleteNodes(refDocInfo.nodeIds, false); if (deleteFromDocStore) { await this.docStore.deleteRefDoc(refDocId, false); } return; } async deleteNodes(nodeIds, deleteFromDocStore) { this.indexStruct.nodes = this.indexStruct.nodes.filter((existingNodeId)=>!nodeIds.includes(existingNodeId)); if (deleteFromDocStore) { for (const nodeId of nodeIds){ await this.docStore.deleteDocument(nodeId, false); } } await this.storageContext.indexStore.addIndexStruct(this.indexStruct); } async getRefDocInfo() { const nodeDocIds = this.indexStruct.nodes; const nodes = await this.docStore.getNodes(nodeDocIds); const refDocInfoMap = {}; for (const node of nodes){ const refNode = node.sourceNode; if (_.isNil(refNode)) { continue; } const refDocInfo = await this.docStore.getRefDocInfo(refNode.nodeId); if (_.isNil(refDocInfo)) { continue; } refDocInfoMap[refNode.nodeId] = refDocInfo; } return refDocInfoMap; } } /** * Simple retriever for SummaryIndex that returns all nodes */ export class SummaryIndexRetriever extends BaseRetriever { index; constructor(index){ super(); this.index = index; } async _retrieve(queryBundle) { const nodeIds = this.index.indexStruct.nodes; const nodes = await this.index.docStore.getNodes(nodeIds); return nodes.map((node)=>({ node: node, score: 1 })); } } /** * LLM retriever for SummaryIndex which lets you select the most relevant chunks. */ export class SummaryIndexLLMRetriever extends BaseRetriever { index; choiceSelectPrompt; choiceBatchSize; formatNodeBatchFn; parseChoiceSelectAnswerFn; constructor(index, choiceSelectPrompt, choiceBatchSize = 10, formatNodeBatchFn, parseChoiceSelectAnswerFn){ super(); this.index = index; this.choiceSelectPrompt = choiceSelectPrompt || defaultChoiceSelectPrompt; this.choiceBatchSize = choiceBatchSize; this.formatNodeBatchFn = formatNodeBatchFn || defaultFormatNodeBatchFn; this.parseChoiceSelectAnswerFn = parseChoiceSelectAnswerFn || defaultParseChoiceSelectAnswerFn; } async _retrieve(query) { const nodeIds = this.index.indexStruct.nodes; const results = []; for(let idx = 0; idx < nodeIds.length; idx += this.choiceBatchSize){ const nodeIdsBatch = nodeIds.slice(idx, idx + this.choiceBatchSize); const nodesBatch = await this.index.docStore.getNodes(nodeIdsBatch); const fmtBatchStr = this.formatNodeBatchFn(nodesBatch); const input = { context: fmtBatchStr, query: extractText(query) }; const llm = Settings.llm; const rawResponse = (await llm.complete({ prompt: this.choiceSelectPrompt.format(input) })).text; // parseResult is a map from doc number to relevance score const parseResult = this.parseChoiceSelectAnswerFn(rawResponse, nodesBatch.length); const choiceNodeIds = nodeIdsBatch.filter((nodeId, idx)=>{ return `${idx}` in parseResult; }); const choiceNodes = await this.index.docStore.getNodes(choiceNodeIds); const nodeWithScores = choiceNodes.map((node, i)=>({ node: node, score: _.get(parseResult, `${i + 1}`, 1) })); results.push(...nodeWithScores); } return results; } }