llamaindex
Version:
<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>
468 lines (458 loc) • 18.6 kB
JavaScript
Object.defineProperty(exports, '__esModule', { value: true });
var chatStore = require('@llamaindex/core/storage/chat-store');
var docStore = require('@llamaindex/core/storage/doc-store');
var indexStore = require('@llamaindex/core/storage/index-store');
var kvStore = require('@llamaindex/core/storage/kv-store');
var global = require('@llamaindex/core/global');
var env = require('@llamaindex/env');
var _ = require('lodash');
var schema = require('@llamaindex/core/schema');
var indices = require('@llamaindex/core/indices');
var nodeParser = require('@llamaindex/core/node-parser');
var embeddings = require('@llamaindex/core/embeddings');
var vectorStore = require('@llamaindex/core/vector-store');
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
var ___default = /*#__PURE__*/_interopDefault(_);
class SimpleDocumentStore extends docStore.KVDocumentStore {
constructor(kvStore$1, namespace){
kvStore$1 = kvStore$1 || new kvStore.SimpleKVStore();
namespace = namespace || global.DEFAULT_NAMESPACE;
super(kvStore$1, namespace);
this.kvStore = kvStore$1;
}
static async fromPersistDir(persistDir = global.DEFAULT_PERSIST_DIR, namespace, options) {
const persistPath = env.path.join(persistDir, global.DEFAULT_DOC_STORE_PERSIST_FILENAME);
return await SimpleDocumentStore.fromPersistPath(persistPath, namespace, options);
}
static async fromPersistPath(persistPath, namespace, options) {
const simpleKVStore = await kvStore.SimpleKVStore.fromPersistPath(persistPath, options);
return new SimpleDocumentStore(simpleKVStore, namespace);
}
async persist(persistPath = env.path.join(global.DEFAULT_PERSIST_DIR, global.DEFAULT_DOC_STORE_PERSIST_FILENAME)) {
if (___default.default.isObject(this.kvStore) && this.kvStore instanceof kvStore.BaseInMemoryKVStore) {
await this.kvStore.persist(persistPath);
}
}
static fromDict(saveDict, namespace) {
const simpleKVStore = kvStore.SimpleKVStore.fromDict(saveDict);
return new SimpleDocumentStore(simpleKVStore, namespace);
}
toDict() {
if (___default.default.isObject(this.kvStore) && this.kvStore instanceof kvStore.SimpleKVStore) {
return this.kvStore.toDict();
}
// If the kvstore is not a SimpleKVStore, you might want to throw an error or return a default value.
throw new Error("KVStore is not a SimpleKVStore");
}
}
// FS utility helpers
/**
* Checks if a file exists.
* Analogous to the os.path.exists function from Python.
* @param path The path to the file to check.
* @returns A promise that resolves to true if the file exists, false otherwise.
*/ async function exists(path) {
try {
await env.fs.access(path);
return true;
} catch {
return false;
}
}
/**
* Recursively traverses a directory and yields all the paths to the files in it.
* @param dirPath The path to the directory to traverse.
*/ async function* walk(dirPath) {
const entries = await env.fs.readdir(dirPath);
for (const entry of entries){
const fullPath = `${dirPath}/${entry}`;
const stats = await env.fs.stat(fullPath);
if (stats.isDirectory()) {
yield* walk(fullPath);
} else {
yield fullPath;
}
}
}
/**
* @internal
*/ class GlobalSettings {
#prompt;
#promptHelper;
#nodeParser;
#chunkOverlap;
#promptHelperAsyncLocalStorage;
#nodeParserAsyncLocalStorage;
#chunkOverlapAsyncLocalStorage;
#promptAsyncLocalStorage;
get debug() {
return global.Settings.debug;
}
get llm() {
return global.Settings.llm;
}
set llm(llm) {
global.Settings.llm = llm;
}
withLLM(llm, fn) {
return global.Settings.withLLM(llm, fn);
}
get promptHelper() {
if (this.#promptHelper === null) {
this.#promptHelper = new indices.PromptHelper();
}
return this.#promptHelperAsyncLocalStorage.getStore() ?? this.#promptHelper;
}
set promptHelper(promptHelper) {
this.#promptHelper = promptHelper;
}
withPromptHelper(promptHelper, fn) {
return this.#promptHelperAsyncLocalStorage.run(promptHelper, fn);
}
get embedModel() {
return global.Settings.embedModel;
}
set embedModel(embedModel) {
global.Settings.embedModel = embedModel;
}
withEmbedModel(embedModel, fn) {
return global.Settings.withEmbedModel(embedModel, fn);
}
get nodeParser() {
if (this.#nodeParser === null) {
this.#nodeParser = new nodeParser.SentenceSplitter({
chunkSize: this.chunkSize,
chunkOverlap: this.chunkOverlap
});
}
return this.#nodeParserAsyncLocalStorage.getStore() ?? this.#nodeParser;
}
set nodeParser(nodeParser) {
this.#nodeParser = nodeParser;
}
withNodeParser(nodeParser, fn) {
return this.#nodeParserAsyncLocalStorage.run(nodeParser, fn);
}
get callbackManager() {
return global.Settings.callbackManager;
}
set callbackManager(callbackManager) {
global.Settings.callbackManager = callbackManager;
}
withCallbackManager(callbackManager, fn) {
return global.Settings.withCallbackManager(callbackManager, fn);
}
set chunkSize(chunkSize) {
global.Settings.chunkSize = chunkSize;
}
get chunkSize() {
return global.Settings.chunkSize;
}
withChunkSize(chunkSize, fn) {
return global.Settings.withChunkSize(chunkSize, fn);
}
get chunkOverlap() {
return this.#chunkOverlapAsyncLocalStorage.getStore() ?? this.#chunkOverlap;
}
set chunkOverlap(chunkOverlap) {
if (typeof chunkOverlap === "number") {
this.#chunkOverlap = chunkOverlap;
}
}
withChunkOverlap(chunkOverlap, fn) {
return this.#chunkOverlapAsyncLocalStorage.run(chunkOverlap, fn);
}
get prompt() {
return this.#promptAsyncLocalStorage.getStore() ?? this.#prompt;
}
set prompt(prompt) {
this.#prompt = prompt;
}
withPrompt(prompt, fn) {
return this.#promptAsyncLocalStorage.run(prompt, fn);
}
constructor(){
this.#prompt = {};
this.#promptHelper = null;
this.#nodeParser = null;
this.#promptHelperAsyncLocalStorage = new env.AsyncLocalStorage();
this.#nodeParserAsyncLocalStorage = new env.AsyncLocalStorage();
this.#chunkOverlapAsyncLocalStorage = new env.AsyncLocalStorage();
this.#promptAsyncLocalStorage = new env.AsyncLocalStorage();
}
}
const Settings = new GlobalSettings();
const LEARNER_MODES = new Set([
vectorStore.VectorStoreQueryMode.SVM,
vectorStore.VectorStoreQueryMode.LINEAR_REGRESSION,
vectorStore.VectorStoreQueryMode.LOGISTIC_REGRESSION
]);
const MMR_MODE = vectorStore.VectorStoreQueryMode.MMR;
// Mapping of filter operators to metadata filter functions
const OPERATOR_TO_FILTER = {
[vectorStore.FilterOperator.EQ]: ({ key, value }, metadata)=>{
return metadata[key] === vectorStore.parsePrimitiveValue(value);
},
[vectorStore.FilterOperator.NE]: ({ key, value }, metadata)=>{
return metadata[key] !== vectorStore.parsePrimitiveValue(value);
},
[vectorStore.FilterOperator.IN]: ({ key, value }, metadata)=>{
return !!vectorStore.parseArrayValue(value).find((v)=>metadata[key] === v);
},
[vectorStore.FilterOperator.NIN]: ({ key, value }, metadata)=>{
return !vectorStore.parseArrayValue(value).find((v)=>metadata[key] === v);
},
[vectorStore.FilterOperator.ANY]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return vectorStore.parseArrayValue(value).some((v)=>metadata[key].includes(v));
},
[vectorStore.FilterOperator.ALL]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return vectorStore.parseArrayValue(value).every((v)=>metadata[key].includes(v));
},
[vectorStore.FilterOperator.TEXT_MATCH]: ({ key, value }, metadata)=>{
return metadata[key].includes(vectorStore.parsePrimitiveValue(value));
},
[vectorStore.FilterOperator.CONTAINS]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return !!vectorStore.parseArrayValue(metadata[key]).find((v)=>v === value);
},
[vectorStore.FilterOperator.GT]: ({ key, value }, metadata)=>{
return metadata[key] > vectorStore.parsePrimitiveValue(value);
},
[vectorStore.FilterOperator.LT]: ({ key, value }, metadata)=>{
return metadata[key] < vectorStore.parsePrimitiveValue(value);
},
[vectorStore.FilterOperator.GTE]: ({ key, value }, metadata)=>{
return metadata[key] >= vectorStore.parsePrimitiveValue(value);
},
[vectorStore.FilterOperator.LTE]: ({ key, value }, metadata)=>{
return metadata[key] <= vectorStore.parsePrimitiveValue(value);
}
};
// Build a filter function based on the metadata and the preFilters
const buildFilterFn = (metadata, preFilters)=>{
if (!preFilters) return true;
if (!metadata) return false;
const { filters, condition } = preFilters;
const queryCondition = condition || "and"; // default to and
const itemFilterFn = (filter)=>{
if (filter.operator === vectorStore.FilterOperator.IS_EMPTY) {
// for `is_empty` operator, return true if the metadata key is not present or the value is empty
const value = metadata[filter.key];
return value === undefined || value === null || value === "" || Array.isArray(value) && value.length === 0;
}
if (metadata[filter.key] === undefined) {
// for other operators, always return false if the metadata key is not present
return false;
}
const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator];
if (!metadataLookupFn) throw new Error(`Unsupported operator: ${filter.operator}`);
return metadataLookupFn(filter, metadata);
};
if (queryCondition === "and") return filters.every(itemFilterFn);
return filters.some(itemFilterFn);
};
class SimpleVectorStoreData {
constructor(){
this.embeddingDict = {};
this.textIdToRefDocId = {};
this.metadataDict = {};
}
}
class SimpleVectorStore extends vectorStore.BaseVectorStore {
constructor(init){
super(init), this.storesText = false;
this.data = init?.data || new SimpleVectorStoreData();
}
static async fromPersistDir(persistDir = global.DEFAULT_PERSIST_DIR, embedModel, options) {
const persistPath = env.path.join(persistDir, "vector_store.json");
return await SimpleVectorStore.fromPersistPath(persistPath, embedModel, options);
}
client() {
return null;
}
async get(textId) {
return this.data.embeddingDict[textId];
}
async add(embeddingResults) {
for (const node of embeddingResults){
this.data.embeddingDict[node.id_] = node.getEmbedding();
if (!node.sourceNode) {
continue;
}
this.data.textIdToRefDocId[node.id_] = node.sourceNode?.nodeId;
// Add metadata to the metadataDict
const metadata = vectorStore.nodeToMetadata(node, true, undefined, false);
delete metadata["_node_content"];
this.data.metadataDict[node.id_] = metadata;
}
if (this.persistPath) {
await this.persist(this.persistPath);
}
return embeddingResults.map((result)=>result.id_);
}
async delete(refDocId) {
const textIdsToDelete = Object.keys(this.data.textIdToRefDocId).filter((textId)=>this.data.textIdToRefDocId[textId] === refDocId);
for (const textId of textIdsToDelete){
delete this.data.embeddingDict[textId];
delete this.data.textIdToRefDocId[textId];
if (this.data.metadataDict) delete this.data.metadataDict[textId];
}
if (this.persistPath) {
await this.persist(this.persistPath);
}
return Promise.resolve();
}
async filterNodes(query) {
const items = Object.entries(this.data.embeddingDict);
const queryFilterFn = (nodeId)=>{
const metadata = this.data.metadataDict[nodeId];
return buildFilterFn(metadata, query.filters);
};
const nodeFilterFn = (nodeId)=>{
if (!query.docIds) return true;
const availableIds = new Set(query.docIds);
return availableIds.has(nodeId);
};
const queriedItems = items.filter((item)=>nodeFilterFn(item[0]) && queryFilterFn(item[0]));
const nodeIds = queriedItems.map((item)=>item[0]);
const embeddings = queriedItems.map((item)=>item[1]);
return {
nodeIds,
embeddings
};
}
async query(query) {
const { nodeIds, embeddings: embeddings$1 } = await this.filterNodes(query);
const queryEmbedding = query.queryEmbedding;
let topSimilarities, topIds;
if (LEARNER_MODES.has(query.mode)) {
// fixme: unfinished
throw new Error("Learner modes not implemented for SimpleVectorStore yet.");
} else if (query.mode === MMR_MODE) {
const mmrThreshold = query.mmrThreshold;
[topSimilarities, topIds] = embeddings.getTopKMMREmbeddings(queryEmbedding, embeddings$1, null, query.similarityTopK, nodeIds, mmrThreshold);
} else if (query.mode === vectorStore.VectorStoreQueryMode.DEFAULT) {
[topSimilarities, topIds] = embeddings.getTopKEmbeddings(queryEmbedding, embeddings$1, query.similarityTopK, nodeIds);
} else {
throw new Error(`Invalid query mode: ${query.mode}`);
}
return Promise.resolve({
similarities: topSimilarities,
ids: topIds
});
}
async persist(persistPath = env.path.join(global.DEFAULT_PERSIST_DIR, "vector_store.json")) {
await SimpleVectorStore.persistData(persistPath, this.data);
}
static async persistData(persistPath, data) {
const dirPath = env.path.dirname(persistPath);
if (!await exists(dirPath)) {
await env.fs.mkdir(dirPath);
}
await env.fs.writeFile(persistPath, JSON.stringify(data));
}
static async fromPersistPath(persistPath, embedModel, options) {
const logger = options?.logger ?? env.consoleLogger;
const dirPath = env.path.dirname(persistPath);
if (!await exists(dirPath)) {
await env.fs.mkdir(dirPath, {
recursive: true
});
}
let dataDict = {};
if (!await exists(persistPath)) {
logger.log(`Starting new store from path: ${persistPath}`);
} else {
try {
const fileData = await env.fs.readFile(persistPath);
dataDict = JSON.parse(fileData.toString());
} catch (e) {
throw new Error(`Failed to load data from path: ${persistPath}`, {
cause: e
});
}
}
const data = new SimpleVectorStoreData();
// @ts-expect-error TS2322
data.embeddingDict = dataDict.embeddingDict ?? {};
// @ts-expect-error TS2322
data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {};
// @ts-expect-error TS2322
data.metadataDict = dataDict.metadataDict ?? {};
const store = new SimpleVectorStore({
data,
embedModel
});
store.persistPath = persistPath;
return store;
}
static fromDict(saveDict, embedModel) {
const data = new SimpleVectorStoreData();
data.embeddingDict = saveDict.embeddingDict;
data.textIdToRefDocId = saveDict.textIdToRefDocId;
data.metadataDict = saveDict.metadataDict;
return new SimpleVectorStore({
data,
embedModel
});
}
toDict() {
return {
embeddingDict: this.data.embeddingDict,
textIdToRefDocId: this.data.textIdToRefDocId,
metadataDict: this.data.metadataDict
};
}
}
async function storageContextFromDefaults({ docStore, indexStore: indexStore$1, vectorStore, vectorStores, persistDir }) {
vectorStores = vectorStores ?? {};
if (!persistDir) {
docStore = docStore ?? new SimpleDocumentStore();
indexStore$1 = indexStore$1 ?? new indexStore.SimpleIndexStore();
if (!(schema.ModalityType.TEXT in vectorStores)) {
vectorStores[schema.ModalityType.TEXT] = vectorStore ?? new SimpleVectorStore();
}
} else {
const embedModel = Settings.embedModel;
docStore = docStore || await SimpleDocumentStore.fromPersistDir(persistDir, global.DEFAULT_NAMESPACE);
indexStore$1 = indexStore$1 || await indexStore.SimpleIndexStore.fromPersistDir(persistDir);
if (!(schema.ObjectType.TEXT in vectorStores)) {
vectorStores[schema.ModalityType.TEXT] = vectorStore ?? await SimpleVectorStore.fromPersistDir(persistDir, embedModel);
}
}
return {
docStore,
indexStore: indexStore$1,
vectorStores
};
}
exports.SimpleDocumentStore = SimpleDocumentStore;
exports.exists = exists;
exports.storageContextFromDefaults = storageContextFromDefaults;
exports.walk = walk;
Object.keys(chatStore).forEach(function (k) {
if (k !== 'default' && !Object.prototype.hasOwnProperty.call(exports, k)) Object.defineProperty(exports, k, {
enumerable: true,
get: function () { return chatStore[k]; }
});
});
Object.keys(docStore).forEach(function (k) {
if (k !== 'default' && !Object.prototype.hasOwnProperty.call(exports, k)) Object.defineProperty(exports, k, {
enumerable: true,
get: function () { return docStore[k]; }
});
});
Object.keys(indexStore).forEach(function (k) {
if (k !== 'default' && !Object.prototype.hasOwnProperty.call(exports, k)) Object.defineProperty(exports, k, {
enumerable: true,
get: function () { return indexStore[k]; }
});
});
Object.keys(kvStore).forEach(function (k) {
if (k !== 'default' && !Object.prototype.hasOwnProperty.call(exports, k)) Object.defineProperty(exports, k, {
enumerable: true,
get: function () { return kvStore[k]; }
});
});