llamaindex
Version:
<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>
315 lines (314 loc) • 13.2 kB
JavaScript
import { ContextChatEngine } from "@llamaindex/core/chat-engine";
import { IndexDict, IndexStructType } from "@llamaindex/core/data-structs";
import { DEFAULT_SIMILARITY_TOP_K } from "@llamaindex/core/embeddings";
import { BaseRetriever } from "@llamaindex/core/retriever";
import { ImageNode, ModalityType, ObjectType, splitNodesByType } from "@llamaindex/core/schema";
import { extractText } from "@llamaindex/core/utils";
import { VectorStoreQueryMode } from "@llamaindex/core/vector-store";
import { Settings } from "../../Settings.js";
import { RetrieverQueryEngine } from "../../engines/query/RetrieverQueryEngine.js";
import { addNodesToVectorStores, runTransformations } from "../../ingestion/IngestionPipeline.js";
import { createDocStoreStrategy, DocStoreStrategy } from "../../ingestion/strategies/index.js";
import { storageContextFromDefaults } from "../../storage/StorageContext.js";
import { BaseIndex } from "../BaseIndex.js";
/**
* The VectorStoreIndex, an index that stores the nodes only according to their vector embeddings.
*/ export class VectorStoreIndex extends BaseIndex {
indexStore;
embedModel;
vectorStores;
constructor(init){
super(init);
this.indexStore = init.indexStore;
this.vectorStores = init.vectorStores ?? init.storageContext.vectorStores;
this.embedModel = Settings.embedModel;
}
/**
* The async init function creates a new VectorStoreIndex.
* @param options
* @returns
*/ static async init(options) {
const storageContext = options.storageContext ?? await storageContextFromDefaults({});
const indexStore = storageContext.indexStore;
const docStore = storageContext.docStore;
let indexStruct = await VectorStoreIndex.setupIndexStructFromStorage(indexStore, options);
if (!options.nodes && !indexStruct) {
throw new Error("Cannot initialize VectorStoreIndex without nodes or indexStruct");
}
indexStruct = indexStruct ?? new IndexDict();
const index = new this({
storageContext,
docStore,
indexStruct,
indexStore,
vectorStores: options.vectorStores
});
if (options.nodes) {
// If nodes are passed in, then we need to update the index
await index.buildIndexFromNodes(options.nodes, {
logProgress: options.logProgress
});
}
return index;
}
static async setupIndexStructFromStorage(indexStore, options) {
const indexStructs = await indexStore.getIndexStructs();
let indexStruct;
if (options.indexStruct && indexStructs.length > 0) {
throw new Error("Cannot initialize index with both indexStruct and indexStore");
}
if (options.indexStruct) {
indexStruct = options.indexStruct;
} else if (indexStructs.length == 1) {
indexStruct = indexStructs[0].type === IndexStructType.SIMPLE_DICT ? indexStructs[0] : undefined;
indexStruct = indexStructs[0];
} else if (indexStructs.length > 1 && options.indexId) {
indexStruct = await indexStore.getIndexStruct(options.indexId);
}
// Check indexStruct type
if (indexStruct && indexStruct.type !== IndexStructType.SIMPLE_DICT) {
throw new Error("Attempting to initialize VectorStoreIndex with non-vector indexStruct");
}
return indexStruct;
}
/**
* Calculates the embeddings for the given nodes.
*
* @param nodes - An array of BaseNode objects representing the nodes for which embeddings are to be calculated.
* @param {Object} [options] - An optional object containing additional parameters.
* @param {boolean} [options.logProgress] - A boolean indicating whether to log progress to the console (useful for debugging).
*/ async getNodeEmbeddingResults(nodes, options) {
const nodeMap = splitNodesByType(nodes);
for(const type in nodeMap){
const nodes = nodeMap[type];
const embedModel = this.vectorStores[type]?.embedModel ?? this.embedModel;
if (embedModel && nodes) {
await embedModel(nodes, {
logProgress: options?.logProgress
});
}
}
return nodes;
}
/**
* Get embeddings for nodes and place them into the index.
* @param nodes
* @returns
*/ async buildIndexFromNodes(nodes, options) {
await this.insertNodes(nodes, options);
}
/**
* High level API: split documents, get embeddings, and build index.
* @param documents
* @param args
* @returns
*/ static async fromDocuments(documents, args = {}) {
args.storageContext = args.storageContext ?? await storageContextFromDefaults({});
args.vectorStores = args.vectorStores ?? args.storageContext.vectorStores;
args.docStoreStrategy = args.docStoreStrategy ?? // set doc store strategy defaults to the same as for the IngestionPipeline
(args.vectorStores ? DocStoreStrategy.UPSERTS : DocStoreStrategy.DUPLICATES_ONLY);
const docStore = args.storageContext.docStore;
if (args.logProgress) {
console.log("Using node parser on documents...");
}
// use doc store strategy to avoid duplicates
const vectorStores = Object.values(args.vectorStores ?? {});
const docStoreStrategy = createDocStoreStrategy(args.docStoreStrategy, docStore, vectorStores);
args.nodes = await runTransformations(documents, [
Settings.nodeParser
], {}, {
docStoreStrategy
});
if (args.logProgress) {
console.log("Finished parsing documents.");
}
try {
return await this.init(args);
} catch (error) {
await docStoreStrategy.rollback(args.storageContext.docStore, args.nodes);
throw error;
}
}
static async fromVectorStores(vectorStores) {
if (!vectorStores[ModalityType.TEXT]?.storesText) {
throw new Error("Cannot initialize from a vector store that does not store text");
}
const storageContext = await storageContextFromDefaults({
vectorStores
});
const index = await this.init({
nodes: [],
storageContext
});
return index;
}
static async fromVectorStore(vectorStore) {
return this.fromVectorStores({
[ModalityType.TEXT]: vectorStore
});
}
asRetriever(options) {
return new VectorIndexRetriever({
index: this,
...options
});
}
/**
* Create a RetrieverQueryEngine.
* similarityTopK is only used if no existing retriever is provided.
*/ asQueryEngine(options) {
const { retriever, responseSynthesizer, preFilters, nodePostprocessors, similarityTopK } = options ?? {};
return new RetrieverQueryEngine(retriever ?? this.asRetriever({
similarityTopK,
filters: preFilters
}), responseSynthesizer, nodePostprocessors);
}
/**
* Convert the index to a chat engine.
* @param options The options for creating the chat engine
* @returns A ContextChatEngine that uses the index's retriever to get context for each query
*/ asChatEngine(options = {}) {
const { retriever, similarityTopK, preFilters, ...contextChatEngineOptions } = options;
return new ContextChatEngine({
retriever: retriever ?? this.asRetriever({
similarityTopK,
filters: preFilters
}),
...contextChatEngineOptions
});
}
async insertNodesToStore(newIds, nodes, vectorStore) {
// NOTE: if the vector store doesn't store text,
// we need to add the nodes to the index struct and document store
// NOTE: if the vector store keeps text,
// we only need to add image and index nodes
for(let i = 0; i < nodes.length; ++i){
const { type } = nodes[i];
if (!vectorStore.storesText || type === ObjectType.INDEX || type === ObjectType.IMAGE) {
const nodeWithoutEmbedding = nodes[i].clone();
nodeWithoutEmbedding.embedding = undefined;
this.indexStruct.addNode(nodeWithoutEmbedding, newIds[i]);
await this.docStore.addDocuments([
nodeWithoutEmbedding
], true);
}
}
}
async insertNodes(nodes, options) {
if (!nodes || nodes.length === 0) {
return;
}
nodes = await this.getNodeEmbeddingResults(nodes, options);
await addNodesToVectorStores(nodes, this.vectorStores, this.insertNodesToStore.bind(this));
await this.indexStore.addIndexStruct(this.indexStruct);
}
async deleteRefDoc(refDocId, deleteFromDocStore = true) {
for (const vectorStore of Object.values(this.vectorStores)){
await this.deleteRefDocFromStore(vectorStore, refDocId);
}
if (deleteFromDocStore) {
await this.docStore.deleteDocument(refDocId, false);
}
}
async deleteRefDocFromStore(vectorStore, refDocId) {
await vectorStore.delete(refDocId);
if (!vectorStore.storesText) {
const refDocInfo = await this.docStore.getRefDocInfo(refDocId);
if (refDocInfo) {
for (const nodeId of refDocInfo.nodeIds){
this.indexStruct.delete(nodeId);
await vectorStore.delete(nodeId);
}
}
await this.indexStore.addIndexStruct(this.indexStruct);
}
}
}
export class VectorIndexRetriever extends BaseRetriever {
index;
topK;
filters;
queryMode;
constructor(options){
super();
this.index = options.index;
this.queryMode = options.mode ?? VectorStoreQueryMode.DEFAULT;
if ("topK" in options && options.topK) {
this.topK = options.topK;
} else {
this.topK = {
[ModalityType.TEXT]: "similarityTopK" in options && options.similarityTopK ? options.similarityTopK : DEFAULT_SIMILARITY_TOP_K,
[ModalityType.IMAGE]: DEFAULT_SIMILARITY_TOP_K
};
}
this.filters = options.filters;
}
/**
* @deprecated, pass similarityTopK or topK in constructor instead or directly modify topK
*/ set similarityTopK(similarityTopK) {
this.topK[ModalityType.TEXT] = similarityTopK;
}
async _retrieve(params) {
const { query } = params;
const vectorStores = this.index.vectorStores;
let nodesWithScores = [];
for(const type in vectorStores){
const vectorStore = vectorStores[type];
nodesWithScores = nodesWithScores.concat(await this.retrieveQuery(query, type, vectorStore));
}
return nodesWithScores;
}
async retrieveQuery(query, type, vectorStore, filters) {
// convert string message to multi-modal format
let queryStr = query;
if (typeof query === "string") {
queryStr = query;
query = [
{
type: "text",
text: queryStr
}
];
} else {
queryStr = extractText(query);
}
// overwrite embed model if specified, otherwise use the one from the vector store
const embedModel = this.index.embedModel ?? vectorStore.embedModel;
let nodes = [];
// query each content item (e.g. text or image) separately
for (const item of query){
const queryEmbedding = await embedModel.getQueryEmbedding(item);
if (queryEmbedding) {
const result = await vectorStore.query({
queryStr,
queryEmbedding,
mode: this.queryMode ?? VectorStoreQueryMode.DEFAULT,
similarityTopK: this.topK[type],
filters: this.filters ?? filters ?? undefined
});
nodes = nodes.concat(this.buildNodeListFromQueryResult(result));
}
}
return nodes;
}
buildNodeListFromQueryResult(result) {
const nodesWithScores = [];
for(let i = 0; i < result.ids.length; i++){
const nodeFromResult = result.nodes?.[i];
if (!this.index.indexStruct.nodesDict[result.ids[i]] && nodeFromResult) {
this.index.indexStruct.nodesDict[result.ids[i]] = nodeFromResult;
}
const node = this.index.indexStruct.nodesDict[result.ids[i]];
// XXX: Hack, if it's an image node, we reconstruct the image from the URL
// Alternative: Store image in doc store and retrieve it here
if (node instanceof ImageNode) {
node.image = node.getUrl();
}
nodesWithScores.push({
node: node,
score: result.similarities[i]
});
}
return nodesWithScores;
}
}