llamaindex
Version:
<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>
248 lines (244 loc) • 10 kB
JavaScript
import { VectorStoreQueryMode, FilterOperator, parsePrimitiveValue, parseArrayValue, BaseVectorStore, nodeToMetadata } from '@llamaindex/core/vector-store';
export * from '@llamaindex/core/vector-store';
import { getTopKMMREmbeddings, getTopKEmbeddings } from '@llamaindex/core/embeddings';
import { DEFAULT_PERSIST_DIR } from '@llamaindex/core/global';
import { fs, path, consoleLogger } from '@llamaindex/env';
// FS utility helpers
/**
* Checks if a file exists.
* Analogous to the os.path.exists function from Python.
* @param path The path to the file to check.
* @returns A promise that resolves to true if the file exists, false otherwise.
*/ async function exists(path) {
try {
await fs.access(path);
return true;
} catch {
return false;
}
}
const LEARNER_MODES = new Set([
VectorStoreQueryMode.SVM,
VectorStoreQueryMode.LINEAR_REGRESSION,
VectorStoreQueryMode.LOGISTIC_REGRESSION
]);
const MMR_MODE = VectorStoreQueryMode.MMR;
// Mapping of filter operators to metadata filter functions
const OPERATOR_TO_FILTER = {
[FilterOperator.EQ]: ({ key, value }, metadata)=>{
return metadata[key] === parsePrimitiveValue(value);
},
[FilterOperator.NE]: ({ key, value }, metadata)=>{
return metadata[key] !== parsePrimitiveValue(value);
},
[FilterOperator.IN]: ({ key, value }, metadata)=>{
return !!parseArrayValue(value).find((v)=>metadata[key] === v);
},
[FilterOperator.NIN]: ({ key, value }, metadata)=>{
return !parseArrayValue(value).find((v)=>metadata[key] === v);
},
[FilterOperator.ANY]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return parseArrayValue(value).some((v)=>metadata[key].includes(v));
},
[FilterOperator.ALL]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return parseArrayValue(value).every((v)=>metadata[key].includes(v));
},
[FilterOperator.TEXT_MATCH]: ({ key, value }, metadata)=>{
return metadata[key].includes(parsePrimitiveValue(value));
},
[FilterOperator.CONTAINS]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return !!parseArrayValue(metadata[key]).find((v)=>v === value);
},
[FilterOperator.GT]: ({ key, value }, metadata)=>{
return metadata[key] > parsePrimitiveValue(value);
},
[FilterOperator.LT]: ({ key, value }, metadata)=>{
return metadata[key] < parsePrimitiveValue(value);
},
[FilterOperator.GTE]: ({ key, value }, metadata)=>{
return metadata[key] >= parsePrimitiveValue(value);
},
[FilterOperator.LTE]: ({ key, value }, metadata)=>{
return metadata[key] <= parsePrimitiveValue(value);
}
};
// Build a filter function based on the metadata and the preFilters
const buildFilterFn = (metadata, preFilters)=>{
if (!preFilters) return true;
if (!metadata) return false;
const { filters, condition } = preFilters;
const queryCondition = condition || "and"; // default to and
const itemFilterFn = (filter)=>{
if (filter.operator === FilterOperator.IS_EMPTY) {
// for `is_empty` operator, return true if the metadata key is not present or the value is empty
const value = metadata[filter.key];
return value === undefined || value === null || value === "" || Array.isArray(value) && value.length === 0;
}
if (metadata[filter.key] === undefined) {
// for other operators, always return false if the metadata key is not present
return false;
}
const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator];
if (!metadataLookupFn) throw new Error(`Unsupported operator: ${filter.operator}`);
return metadataLookupFn(filter, metadata);
};
if (queryCondition === "and") return filters.every(itemFilterFn);
return filters.some(itemFilterFn);
};
class SimpleVectorStoreData {
constructor(){
this.embeddingDict = {};
this.textIdToRefDocId = {};
this.metadataDict = {};
}
}
class SimpleVectorStore extends BaseVectorStore {
constructor(init){
super(init), this.storesText = false;
this.data = init?.data || new SimpleVectorStoreData();
}
static async fromPersistDir(persistDir = DEFAULT_PERSIST_DIR, embedModel, options) {
const persistPath = path.join(persistDir, "vector_store.json");
return await SimpleVectorStore.fromPersistPath(persistPath, embedModel, options);
}
client() {
return null;
}
async get(textId) {
return this.data.embeddingDict[textId];
}
async add(embeddingResults) {
for (const node of embeddingResults){
this.data.embeddingDict[node.id_] = node.getEmbedding();
if (!node.sourceNode) {
continue;
}
this.data.textIdToRefDocId[node.id_] = node.sourceNode?.nodeId;
// Add metadata to the metadataDict
const metadata = nodeToMetadata(node, true, undefined, false);
delete metadata["_node_content"];
this.data.metadataDict[node.id_] = metadata;
}
if (this.persistPath) {
await this.persist(this.persistPath);
}
return embeddingResults.map((result)=>result.id_);
}
async delete(refDocId) {
const textIdsToDelete = Object.keys(this.data.textIdToRefDocId).filter((textId)=>this.data.textIdToRefDocId[textId] === refDocId);
for (const textId of textIdsToDelete){
delete this.data.embeddingDict[textId];
delete this.data.textIdToRefDocId[textId];
if (this.data.metadataDict) delete this.data.metadataDict[textId];
}
if (this.persistPath) {
await this.persist(this.persistPath);
}
return Promise.resolve();
}
async filterNodes(query) {
const items = Object.entries(this.data.embeddingDict);
const queryFilterFn = (nodeId)=>{
const metadata = this.data.metadataDict[nodeId];
return buildFilterFn(metadata, query.filters);
};
const nodeFilterFn = (nodeId)=>{
if (!query.docIds) return true;
const availableIds = new Set(query.docIds);
return availableIds.has(nodeId);
};
const queriedItems = items.filter((item)=>nodeFilterFn(item[0]) && queryFilterFn(item[0]));
const nodeIds = queriedItems.map((item)=>item[0]);
const embeddings = queriedItems.map((item)=>item[1]);
return {
nodeIds,
embeddings
};
}
async query(query) {
const { nodeIds, embeddings } = await this.filterNodes(query);
const queryEmbedding = query.queryEmbedding;
let topSimilarities, topIds;
if (LEARNER_MODES.has(query.mode)) {
// fixme: unfinished
throw new Error("Learner modes not implemented for SimpleVectorStore yet.");
} else if (query.mode === MMR_MODE) {
const mmrThreshold = query.mmrThreshold;
[topSimilarities, topIds] = getTopKMMREmbeddings(queryEmbedding, embeddings, null, query.similarityTopK, nodeIds, mmrThreshold);
} else if (query.mode === VectorStoreQueryMode.DEFAULT) {
[topSimilarities, topIds] = getTopKEmbeddings(queryEmbedding, embeddings, query.similarityTopK, nodeIds);
} else {
throw new Error(`Invalid query mode: ${query.mode}`);
}
return Promise.resolve({
similarities: topSimilarities,
ids: topIds
});
}
async persist(persistPath = path.join(DEFAULT_PERSIST_DIR, "vector_store.json")) {
await SimpleVectorStore.persistData(persistPath, this.data);
}
static async persistData(persistPath, data) {
const dirPath = path.dirname(persistPath);
if (!await exists(dirPath)) {
await fs.mkdir(dirPath);
}
await fs.writeFile(persistPath, JSON.stringify(data));
}
static async fromPersistPath(persistPath, embedModel, options) {
const logger = options?.logger ?? consoleLogger;
const dirPath = path.dirname(persistPath);
if (!await exists(dirPath)) {
await fs.mkdir(dirPath, {
recursive: true
});
}
let dataDict = {};
if (!await exists(persistPath)) {
logger.log(`Starting new store from path: ${persistPath}`);
} else {
try {
const fileData = await fs.readFile(persistPath);
dataDict = JSON.parse(fileData.toString());
} catch (e) {
throw new Error(`Failed to load data from path: ${persistPath}`, {
cause: e
});
}
}
const data = new SimpleVectorStoreData();
// @ts-expect-error TS2322
data.embeddingDict = dataDict.embeddingDict ?? {};
// @ts-expect-error TS2322
data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {};
// @ts-expect-error TS2322
data.metadataDict = dataDict.metadataDict ?? {};
const store = new SimpleVectorStore({
data,
embedModel
});
store.persistPath = persistPath;
return store;
}
static fromDict(saveDict, embedModel) {
const data = new SimpleVectorStoreData();
data.embeddingDict = saveDict.embeddingDict;
data.textIdToRefDocId = saveDict.textIdToRefDocId;
data.metadataDict = saveDict.metadataDict;
return new SimpleVectorStore({
data,
embedModel
});
}
toDict() {
return {
embeddingDict: this.data.embeddingDict,
textIdToRefDocId: this.data.textIdToRefDocId,
metadataDict: this.data.metadataDict
};
}
}
export { SimpleVectorStore };