llamaindex
Version:
<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>
438 lines (431 loc) • 17 kB
JavaScript
export * from '@llamaindex/core/storage/chat-store';
import { KVDocumentStore } from '@llamaindex/core/storage/doc-store';
export * from '@llamaindex/core/storage/doc-store';
import { SimpleIndexStore } from '@llamaindex/core/storage/index-store';
export * from '@llamaindex/core/storage/index-store';
import { SimpleKVStore, BaseInMemoryKVStore } from '@llamaindex/core/storage/kv-store';
export * from '@llamaindex/core/storage/kv-store';
import { DEFAULT_PERSIST_DIR, DEFAULT_DOC_STORE_PERSIST_FILENAME, DEFAULT_NAMESPACE, Settings as Settings$1 } from '@llamaindex/core/global';
import { path, fs, AsyncLocalStorage, consoleLogger } from '@llamaindex/env';
import _ from 'lodash';
import { ModalityType, ObjectType } from '@llamaindex/core/schema';
import { PromptHelper } from '@llamaindex/core/indices';
import { SentenceSplitter } from '@llamaindex/core/node-parser';
import { getTopKMMREmbeddings, getTopKEmbeddings } from '@llamaindex/core/embeddings';
import { VectorStoreQueryMode, FilterOperator, parsePrimitiveValue, parseArrayValue, BaseVectorStore, nodeToMetadata } from '@llamaindex/core/vector-store';
class SimpleDocumentStore extends KVDocumentStore {
constructor(kvStore, namespace){
kvStore = kvStore || new SimpleKVStore();
namespace = namespace || DEFAULT_NAMESPACE;
super(kvStore, namespace);
this.kvStore = kvStore;
}
static async fromPersistDir(persistDir = DEFAULT_PERSIST_DIR, namespace, options) {
const persistPath = path.join(persistDir, DEFAULT_DOC_STORE_PERSIST_FILENAME);
return await SimpleDocumentStore.fromPersistPath(persistPath, namespace, options);
}
static async fromPersistPath(persistPath, namespace, options) {
const simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath, options);
return new SimpleDocumentStore(simpleKVStore, namespace);
}
async persist(persistPath = path.join(DEFAULT_PERSIST_DIR, DEFAULT_DOC_STORE_PERSIST_FILENAME)) {
if (_.isObject(this.kvStore) && this.kvStore instanceof BaseInMemoryKVStore) {
await this.kvStore.persist(persistPath);
}
}
static fromDict(saveDict, namespace) {
const simpleKVStore = SimpleKVStore.fromDict(saveDict);
return new SimpleDocumentStore(simpleKVStore, namespace);
}
toDict() {
if (_.isObject(this.kvStore) && this.kvStore instanceof SimpleKVStore) {
return this.kvStore.toDict();
}
// If the kvstore is not a SimpleKVStore, you might want to throw an error or return a default value.
throw new Error("KVStore is not a SimpleKVStore");
}
}
// FS utility helpers
/**
* Checks if a file exists.
* Analogous to the os.path.exists function from Python.
* @param path The path to the file to check.
* @returns A promise that resolves to true if the file exists, false otherwise.
*/ async function exists(path) {
try {
await fs.access(path);
return true;
} catch {
return false;
}
}
/**
* Recursively traverses a directory and yields all the paths to the files in it.
* @param dirPath The path to the directory to traverse.
*/ async function* walk(dirPath) {
const entries = await fs.readdir(dirPath);
for (const entry of entries){
const fullPath = `${dirPath}/${entry}`;
const stats = await fs.stat(fullPath);
if (stats.isDirectory()) {
yield* walk(fullPath);
} else {
yield fullPath;
}
}
}
/**
* @internal
*/ class GlobalSettings {
#prompt;
#promptHelper;
#nodeParser;
#chunkOverlap;
#promptHelperAsyncLocalStorage;
#nodeParserAsyncLocalStorage;
#chunkOverlapAsyncLocalStorage;
#promptAsyncLocalStorage;
get debug() {
return Settings$1.debug;
}
get llm() {
return Settings$1.llm;
}
set llm(llm) {
Settings$1.llm = llm;
}
withLLM(llm, fn) {
return Settings$1.withLLM(llm, fn);
}
get promptHelper() {
if (this.#promptHelper === null) {
this.#promptHelper = new PromptHelper();
}
return this.#promptHelperAsyncLocalStorage.getStore() ?? this.#promptHelper;
}
set promptHelper(promptHelper) {
this.#promptHelper = promptHelper;
}
withPromptHelper(promptHelper, fn) {
return this.#promptHelperAsyncLocalStorage.run(promptHelper, fn);
}
get embedModel() {
return Settings$1.embedModel;
}
set embedModel(embedModel) {
Settings$1.embedModel = embedModel;
}
withEmbedModel(embedModel, fn) {
return Settings$1.withEmbedModel(embedModel, fn);
}
get nodeParser() {
if (this.#nodeParser === null) {
this.#nodeParser = new SentenceSplitter({
chunkSize: this.chunkSize,
chunkOverlap: this.chunkOverlap
});
}
return this.#nodeParserAsyncLocalStorage.getStore() ?? this.#nodeParser;
}
set nodeParser(nodeParser) {
this.#nodeParser = nodeParser;
}
withNodeParser(nodeParser, fn) {
return this.#nodeParserAsyncLocalStorage.run(nodeParser, fn);
}
get callbackManager() {
return Settings$1.callbackManager;
}
set callbackManager(callbackManager) {
Settings$1.callbackManager = callbackManager;
}
withCallbackManager(callbackManager, fn) {
return Settings$1.withCallbackManager(callbackManager, fn);
}
set chunkSize(chunkSize) {
Settings$1.chunkSize = chunkSize;
}
get chunkSize() {
return Settings$1.chunkSize;
}
withChunkSize(chunkSize, fn) {
return Settings$1.withChunkSize(chunkSize, fn);
}
get chunkOverlap() {
return this.#chunkOverlapAsyncLocalStorage.getStore() ?? this.#chunkOverlap;
}
set chunkOverlap(chunkOverlap) {
if (typeof chunkOverlap === "number") {
this.#chunkOverlap = chunkOverlap;
}
}
withChunkOverlap(chunkOverlap, fn) {
return this.#chunkOverlapAsyncLocalStorage.run(chunkOverlap, fn);
}
get prompt() {
return this.#promptAsyncLocalStorage.getStore() ?? this.#prompt;
}
set prompt(prompt) {
this.#prompt = prompt;
}
withPrompt(prompt, fn) {
return this.#promptAsyncLocalStorage.run(prompt, fn);
}
constructor(){
this.#prompt = {};
this.#promptHelper = null;
this.#nodeParser = null;
this.#promptHelperAsyncLocalStorage = new AsyncLocalStorage();
this.#nodeParserAsyncLocalStorage = new AsyncLocalStorage();
this.#chunkOverlapAsyncLocalStorage = new AsyncLocalStorage();
this.#promptAsyncLocalStorage = new AsyncLocalStorage();
}
}
const Settings = new GlobalSettings();
const LEARNER_MODES = new Set([
VectorStoreQueryMode.SVM,
VectorStoreQueryMode.LINEAR_REGRESSION,
VectorStoreQueryMode.LOGISTIC_REGRESSION
]);
const MMR_MODE = VectorStoreQueryMode.MMR;
// Mapping of filter operators to metadata filter functions
const OPERATOR_TO_FILTER = {
[FilterOperator.EQ]: ({ key, value }, metadata)=>{
return metadata[key] === parsePrimitiveValue(value);
},
[FilterOperator.NE]: ({ key, value }, metadata)=>{
return metadata[key] !== parsePrimitiveValue(value);
},
[FilterOperator.IN]: ({ key, value }, metadata)=>{
return !!parseArrayValue(value).find((v)=>metadata[key] === v);
},
[FilterOperator.NIN]: ({ key, value }, metadata)=>{
return !parseArrayValue(value).find((v)=>metadata[key] === v);
},
[FilterOperator.ANY]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return parseArrayValue(value).some((v)=>metadata[key].includes(v));
},
[FilterOperator.ALL]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return parseArrayValue(value).every((v)=>metadata[key].includes(v));
},
[FilterOperator.TEXT_MATCH]: ({ key, value }, metadata)=>{
return metadata[key].includes(parsePrimitiveValue(value));
},
[FilterOperator.CONTAINS]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return !!parseArrayValue(metadata[key]).find((v)=>v === value);
},
[FilterOperator.GT]: ({ key, value }, metadata)=>{
return metadata[key] > parsePrimitiveValue(value);
},
[FilterOperator.LT]: ({ key, value }, metadata)=>{
return metadata[key] < parsePrimitiveValue(value);
},
[FilterOperator.GTE]: ({ key, value }, metadata)=>{
return metadata[key] >= parsePrimitiveValue(value);
},
[FilterOperator.LTE]: ({ key, value }, metadata)=>{
return metadata[key] <= parsePrimitiveValue(value);
}
};
// Build a filter function based on the metadata and the preFilters
const buildFilterFn = (metadata, preFilters)=>{
if (!preFilters) return true;
if (!metadata) return false;
const { filters, condition } = preFilters;
const queryCondition = condition || "and"; // default to and
const itemFilterFn = (filter)=>{
if (filter.operator === FilterOperator.IS_EMPTY) {
// for `is_empty` operator, return true if the metadata key is not present or the value is empty
const value = metadata[filter.key];
return value === undefined || value === null || value === "" || Array.isArray(value) && value.length === 0;
}
if (metadata[filter.key] === undefined) {
// for other operators, always return false if the metadata key is not present
return false;
}
const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator];
if (!metadataLookupFn) throw new Error(`Unsupported operator: ${filter.operator}`);
return metadataLookupFn(filter, metadata);
};
if (queryCondition === "and") return filters.every(itemFilterFn);
return filters.some(itemFilterFn);
};
class SimpleVectorStoreData {
constructor(){
this.embeddingDict = {};
this.textIdToRefDocId = {};
this.metadataDict = {};
}
}
class SimpleVectorStore extends BaseVectorStore {
constructor(init){
super(init), this.storesText = false;
this.data = init?.data || new SimpleVectorStoreData();
}
static async fromPersistDir(persistDir = DEFAULT_PERSIST_DIR, embedModel, options) {
const persistPath = path.join(persistDir, "vector_store.json");
return await SimpleVectorStore.fromPersistPath(persistPath, embedModel, options);
}
client() {
return null;
}
async get(textId) {
return this.data.embeddingDict[textId];
}
async add(embeddingResults) {
for (const node of embeddingResults){
this.data.embeddingDict[node.id_] = node.getEmbedding();
if (!node.sourceNode) {
continue;
}
this.data.textIdToRefDocId[node.id_] = node.sourceNode?.nodeId;
// Add metadata to the metadataDict
const metadata = nodeToMetadata(node, true, undefined, false);
delete metadata["_node_content"];
this.data.metadataDict[node.id_] = metadata;
}
if (this.persistPath) {
await this.persist(this.persistPath);
}
return embeddingResults.map((result)=>result.id_);
}
async delete(refDocId) {
const textIdsToDelete = Object.keys(this.data.textIdToRefDocId).filter((textId)=>this.data.textIdToRefDocId[textId] === refDocId);
for (const textId of textIdsToDelete){
delete this.data.embeddingDict[textId];
delete this.data.textIdToRefDocId[textId];
if (this.data.metadataDict) delete this.data.metadataDict[textId];
}
if (this.persistPath) {
await this.persist(this.persistPath);
}
return Promise.resolve();
}
async filterNodes(query) {
const items = Object.entries(this.data.embeddingDict);
const queryFilterFn = (nodeId)=>{
const metadata = this.data.metadataDict[nodeId];
return buildFilterFn(metadata, query.filters);
};
const nodeFilterFn = (nodeId)=>{
if (!query.docIds) return true;
const availableIds = new Set(query.docIds);
return availableIds.has(nodeId);
};
const queriedItems = items.filter((item)=>nodeFilterFn(item[0]) && queryFilterFn(item[0]));
const nodeIds = queriedItems.map((item)=>item[0]);
const embeddings = queriedItems.map((item)=>item[1]);
return {
nodeIds,
embeddings
};
}
async query(query) {
const { nodeIds, embeddings } = await this.filterNodes(query);
const queryEmbedding = query.queryEmbedding;
let topSimilarities, topIds;
if (LEARNER_MODES.has(query.mode)) {
// fixme: unfinished
throw new Error("Learner modes not implemented for SimpleVectorStore yet.");
} else if (query.mode === MMR_MODE) {
const mmrThreshold = query.mmrThreshold;
[topSimilarities, topIds] = getTopKMMREmbeddings(queryEmbedding, embeddings, null, query.similarityTopK, nodeIds, mmrThreshold);
} else if (query.mode === VectorStoreQueryMode.DEFAULT) {
[topSimilarities, topIds] = getTopKEmbeddings(queryEmbedding, embeddings, query.similarityTopK, nodeIds);
} else {
throw new Error(`Invalid query mode: ${query.mode}`);
}
return Promise.resolve({
similarities: topSimilarities,
ids: topIds
});
}
async persist(persistPath = path.join(DEFAULT_PERSIST_DIR, "vector_store.json")) {
await SimpleVectorStore.persistData(persistPath, this.data);
}
static async persistData(persistPath, data) {
const dirPath = path.dirname(persistPath);
if (!await exists(dirPath)) {
await fs.mkdir(dirPath);
}
await fs.writeFile(persistPath, JSON.stringify(data));
}
static async fromPersistPath(persistPath, embedModel, options) {
const logger = options?.logger ?? consoleLogger;
const dirPath = path.dirname(persistPath);
if (!await exists(dirPath)) {
await fs.mkdir(dirPath, {
recursive: true
});
}
let dataDict = {};
if (!await exists(persistPath)) {
logger.log(`Starting new store from path: ${persistPath}`);
} else {
try {
const fileData = await fs.readFile(persistPath);
dataDict = JSON.parse(fileData.toString());
} catch (e) {
throw new Error(`Failed to load data from path: ${persistPath}`, {
cause: e
});
}
}
const data = new SimpleVectorStoreData();
// @ts-expect-error TS2322
data.embeddingDict = dataDict.embeddingDict ?? {};
// @ts-expect-error TS2322
data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {};
// @ts-expect-error TS2322
data.metadataDict = dataDict.metadataDict ?? {};
const store = new SimpleVectorStore({
data,
embedModel
});
store.persistPath = persistPath;
return store;
}
static fromDict(saveDict, embedModel) {
const data = new SimpleVectorStoreData();
data.embeddingDict = saveDict.embeddingDict;
data.textIdToRefDocId = saveDict.textIdToRefDocId;
data.metadataDict = saveDict.metadataDict;
return new SimpleVectorStore({
data,
embedModel
});
}
toDict() {
return {
embeddingDict: this.data.embeddingDict,
textIdToRefDocId: this.data.textIdToRefDocId,
metadataDict: this.data.metadataDict
};
}
}
async function storageContextFromDefaults({ docStore, indexStore, vectorStore, vectorStores, persistDir }) {
vectorStores = vectorStores ?? {};
if (!persistDir) {
docStore = docStore ?? new SimpleDocumentStore();
indexStore = indexStore ?? new SimpleIndexStore();
if (!(ModalityType.TEXT in vectorStores)) {
vectorStores[ModalityType.TEXT] = vectorStore ?? new SimpleVectorStore();
}
} else {
const embedModel = Settings.embedModel;
docStore = docStore || await SimpleDocumentStore.fromPersistDir(persistDir, DEFAULT_NAMESPACE);
indexStore = indexStore || await SimpleIndexStore.fromPersistDir(persistDir);
if (!(ObjectType.TEXT in vectorStores)) {
vectorStores[ModalityType.TEXT] = vectorStore ?? await SimpleVectorStore.fromPersistDir(persistDir, embedModel);
}
}
return {
docStore,
indexStore,
vectorStores
};
}
export { SimpleDocumentStore, exists, storageContextFromDefaults, walk };