llamaindex
Version:
<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>
224 lines (223 loc) • 9.05 kB
JavaScript
import { IndexList, IndexStructType } from "@llamaindex/core/data-structs";
import { defaultChoiceSelectPrompt } from "@llamaindex/core/prompts";
import { getResponseSynthesizer } from "@llamaindex/core/response-synthesizers";
import { BaseRetriever } from "@llamaindex/core/retriever";
import { extractText } from "@llamaindex/core/utils";
import _ from "lodash";
import { Settings } from "../../Settings.js";
import { ContextChatEngine } from "../../engines/chat/index.js";
import { RetrieverQueryEngine } from "../../engines/query/index.js";
import { storageContextFromDefaults } from "../../storage/StorageContext.js";
import { BaseIndex } from "../BaseIndex.js";
import { defaultFormatNodeBatchFn, defaultParseChoiceSelectAnswerFn } from "./utils.js";
export var SummaryRetrieverMode = /*#__PURE__*/ function(SummaryRetrieverMode) {
SummaryRetrieverMode["DEFAULT"] = "default";
// EMBEDDING = "embedding",
SummaryRetrieverMode["LLM"] = "llm";
return SummaryRetrieverMode;
}({});
/**
* A SummaryIndex keeps nodes in a sequential order for use with summarization.
*/ export class SummaryIndex extends BaseIndex {
constructor(init){
super(init);
}
static async init(options) {
const storageContext = options.storageContext ?? await storageContextFromDefaults({});
const { docStore, indexStore } = storageContext;
// Setup IndexStruct from storage
const indexStructs = await indexStore.getIndexStructs();
let indexStruct;
if (options.indexStruct && indexStructs.length > 0) {
throw new Error("Cannot initialize index with both indexStruct and indexStore");
}
if (options.indexStruct) {
indexStruct = options.indexStruct;
} else if (indexStructs.length == 1) {
indexStruct = indexStructs[0].type === IndexStructType.LIST ? indexStructs[0] : null;
} else if (indexStructs.length > 1 && options.indexId) {
indexStruct = await indexStore.getIndexStruct(options.indexId);
} else {
indexStruct = null;
}
// check indexStruct type
if (indexStruct && indexStruct.type !== IndexStructType.LIST) {
throw new Error("Attempting to initialize SummaryIndex with non-list indexStruct");
}
if (indexStruct) {
if (options.nodes) {
throw new Error("Cannot initialize SummaryIndex with both nodes and indexStruct");
}
} else {
if (!options.nodes) {
throw new Error("Cannot initialize SummaryIndex without nodes or indexStruct");
}
indexStruct = await SummaryIndex.buildIndexFromNodes(options.nodes, storageContext.docStore);
await indexStore.addIndexStruct(indexStruct);
}
return new SummaryIndex({
storageContext,
docStore,
indexStore,
indexStruct
});
}
static async fromDocuments(documents, args = {}) {
let { storageContext } = args;
storageContext = storageContext ?? await storageContextFromDefaults({});
const docStore = storageContext.docStore;
await docStore.addDocuments(documents, true);
for (const doc of documents){
await docStore.setDocumentHash(doc.id_, doc.hash);
}
const nodes = await Settings.nodeParser.getNodesFromDocuments(documents);
const index = await SummaryIndex.init({
nodes,
storageContext
});
return index;
}
asRetriever(options) {
const { mode = "default" } = options ?? {};
switch(mode){
case "default":
return new SummaryIndexRetriever(this);
case "llm":
return new SummaryIndexLLMRetriever(this);
default:
throw new Error(`Unknown retriever mode: ${mode}`);
}
}
asQueryEngine(options) {
let { retriever, responseSynthesizer } = options ?? {};
if (!retriever) {
retriever = this.asRetriever();
}
if (!responseSynthesizer) {
responseSynthesizer = getResponseSynthesizer("compact");
}
return new RetrieverQueryEngine(retriever, responseSynthesizer, options?.nodePostprocessors);
}
asChatEngine(options) {
const { retriever, mode, ...contextChatEngineOptions } = options ?? {};
return new ContextChatEngine({
retriever: retriever ?? this.asRetriever({
mode: mode ?? "default"
}),
...contextChatEngineOptions
});
}
static async buildIndexFromNodes(nodes, docStore, indexStruct) {
indexStruct = indexStruct || new IndexList();
await docStore.addDocuments(nodes, true);
for (const node of nodes){
indexStruct.addNode(node);
}
return indexStruct;
}
async insertNodes(nodes) {
for (const node of nodes){
this.indexStruct.addNode(node);
}
}
async deleteRefDoc(refDocId, deleteFromDocStore) {
const refDocInfo = await this.docStore.getRefDocInfo(refDocId);
if (!refDocInfo) {
return;
}
await this.deleteNodes(refDocInfo.nodeIds, false);
if (deleteFromDocStore) {
await this.docStore.deleteRefDoc(refDocId, false);
}
return;
}
async deleteNodes(nodeIds, deleteFromDocStore) {
this.indexStruct.nodes = this.indexStruct.nodes.filter((existingNodeId)=>!nodeIds.includes(existingNodeId));
if (deleteFromDocStore) {
for (const nodeId of nodeIds){
await this.docStore.deleteDocument(nodeId, false);
}
}
await this.storageContext.indexStore.addIndexStruct(this.indexStruct);
}
async getRefDocInfo() {
const nodeDocIds = this.indexStruct.nodes;
const nodes = await this.docStore.getNodes(nodeDocIds);
const refDocInfoMap = {};
for (const node of nodes){
const refNode = node.sourceNode;
if (_.isNil(refNode)) {
continue;
}
const refDocInfo = await this.docStore.getRefDocInfo(refNode.nodeId);
if (_.isNil(refDocInfo)) {
continue;
}
refDocInfoMap[refNode.nodeId] = refDocInfo;
}
return refDocInfoMap;
}
}
/**
* Simple retriever for SummaryIndex that returns all nodes
*/ export class SummaryIndexRetriever extends BaseRetriever {
index;
constructor(index){
super();
this.index = index;
}
async _retrieve(queryBundle) {
const nodeIds = this.index.indexStruct.nodes;
const nodes = await this.index.docStore.getNodes(nodeIds);
return nodes.map((node)=>({
node: node,
score: 1
}));
}
}
/**
* LLM retriever for SummaryIndex which lets you select the most relevant chunks.
*/ export class SummaryIndexLLMRetriever extends BaseRetriever {
index;
choiceSelectPrompt;
choiceBatchSize;
formatNodeBatchFn;
parseChoiceSelectAnswerFn;
constructor(index, choiceSelectPrompt, choiceBatchSize = 10, formatNodeBatchFn, parseChoiceSelectAnswerFn){
super();
this.index = index;
this.choiceSelectPrompt = choiceSelectPrompt || defaultChoiceSelectPrompt;
this.choiceBatchSize = choiceBatchSize;
this.formatNodeBatchFn = formatNodeBatchFn || defaultFormatNodeBatchFn;
this.parseChoiceSelectAnswerFn = parseChoiceSelectAnswerFn || defaultParseChoiceSelectAnswerFn;
}
async _retrieve(query) {
const nodeIds = this.index.indexStruct.nodes;
const results = [];
for(let idx = 0; idx < nodeIds.length; idx += this.choiceBatchSize){
const nodeIdsBatch = nodeIds.slice(idx, idx + this.choiceBatchSize);
const nodesBatch = await this.index.docStore.getNodes(nodeIdsBatch);
const fmtBatchStr = this.formatNodeBatchFn(nodesBatch);
const input = {
context: fmtBatchStr,
query: extractText(query)
};
const llm = Settings.llm;
const rawResponse = (await llm.complete({
prompt: this.choiceSelectPrompt.format(input)
})).text;
// parseResult is a map from doc number to relevance score
const parseResult = this.parseChoiceSelectAnswerFn(rawResponse, nodesBatch.length);
const choiceNodeIds = nodeIdsBatch.filter((nodeId, idx)=>{
return `${idx}` in parseResult;
});
const choiceNodes = await this.index.docStore.getNodes(choiceNodeIds);
const nodeWithScores = choiceNodes.map((node, i)=>({
node: node,
score: _.get(parseResult, `${i + 1}`, 1)
}));
results.push(...nodeWithScores);
}
return results;
}
}