@llamaindex/core
Version:
LlamaIndex Core Module
248 lines (243 loc) • 8.9 kB
JavaScript
Object.defineProperty(exports, '__esModule', { value: true });
var env = require('@llamaindex/env');
var index_cjs = require('../../../global/dist/index.cjs');
var index_cjs$1 = require('../../../schema/dist/index.cjs');
const TYPE_KEY = "__type__";
const DATA_KEY = "__data__";
const jsonSerializer = {
toPersistence (data) {
return JSON.stringify(data);
},
fromPersistence (data) {
return JSON.parse(data);
}
};
const noneSerializer = {
toPersistence (data) {
return data;
},
fromPersistence (data) {
return data;
}
};
function isValidDocJson(docJson) {
return typeof docJson === "object" && docJson !== null && docJson[TYPE_KEY] !== undefined && docJson[DATA_KEY] !== undefined;
}
function docToJson(doc, serializer) {
return {
[DATA_KEY]: serializer.toPersistence(doc.toJSON()),
[TYPE_KEY]: doc.type
};
}
function jsonToDoc(docDict, serializer) {
const docType = docDict[TYPE_KEY];
// fixme: zod type check this
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const dataDict = serializer.fromPersistence(docDict[DATA_KEY]);
let doc;
if (docType === index_cjs$1.ObjectType.DOCUMENT) {
doc = new index_cjs$1.Document({
text: dataDict.text,
id_: dataDict.id_,
embedding: dataDict.embedding,
hash: dataDict.hash,
metadata: dataDict.metadata
});
} else if (docType === index_cjs$1.ObjectType.TEXT) {
doc = new index_cjs$1.TextNode({
text: dataDict.text,
id_: dataDict.id_,
hash: dataDict.hash,
metadata: dataDict.metadata,
relationships: dataDict.relationships
});
} else {
throw new Error(`Unknown doc type: ${docType}`);
}
return doc;
}
const DEFAULT_PERSIST_PATH = env.path.join(index_cjs.DEFAULT_PERSIST_DIR, index_cjs.DEFAULT_DOC_STORE_PERSIST_FILENAME);
class BaseDocumentStore {
// Save/load
persist(persistPath = DEFAULT_PERSIST_PATH) {
// Persist the docstore to a file.
}
// Nodes
getNodes(nodeIds, raiseError = true) {
return Promise.all(nodeIds.map((nodeId)=>this.getNode(nodeId, raiseError)));
}
async getNode(nodeId, raiseError = true) {
const doc = await this.getDocument(nodeId, raiseError);
if (!(doc instanceof index_cjs$1.BaseNode)) {
throw new Error(`Document ${nodeId} is not a Node.`);
}
return doc;
}
async getNodeDict(nodeIdDict) {
const result = {};
for(const index in nodeIdDict){
result[index] = await this.getNode(nodeIdDict[index]);
}
return result;
}
constructor(){
// eslint-disable-next-line @typescript-eslint/no-explicit-any
this.serializer = jsonSerializer;
}
}
class KVDocumentStore extends BaseDocumentStore {
constructor(kvstore, namespace = index_cjs.DEFAULT_NAMESPACE){
super();
this.kvstore = kvstore;
this.nodeCollection = `${namespace}/data`;
this.refDocCollection = `${namespace}/ref_doc_info`;
this.metadataCollection = `${namespace}/metadata`;
}
async docs() {
const jsonDict = await this.kvstore.getAll(this.nodeCollection);
const docs = {};
for(const key in jsonDict){
const value = jsonDict[key];
if (isValidDocJson(value)) {
docs[key] = jsonToDoc(value, this.serializer);
} else {
console.warn(`Invalid JSON for docId ${key}`);
}
}
return docs;
}
async addDocuments(docs, allowUpdate = true) {
for(let idx = 0; idx < docs.length; idx++){
const doc = docs[idx];
if (doc.id_ === null) {
throw new Error("doc_id not set");
}
if (!allowUpdate && await this.documentExists(doc.id_)) {
throw new Error(`doc_id ${doc.id_} already exists. Set allow_update to True to overwrite.`);
}
const nodeKey = doc.id_;
const data = docToJson(doc, this.serializer);
await this.kvstore.put(nodeKey, data, this.nodeCollection);
const metadata = {
docHash: doc.hash
};
if (doc.type === index_cjs$1.ObjectType.TEXT && doc.sourceNode !== undefined) {
const refDocInfo = await this.getRefDocInfo(doc.sourceNode.nodeId) || {
nodeIds: [],
extraInfo: {}
};
refDocInfo.nodeIds.push(doc.id_);
if (Object.keys(refDocInfo.extraInfo).length === 0) {
refDocInfo.extraInfo = {};
}
await this.kvstore.put(doc.sourceNode.nodeId, refDocInfo, this.refDocCollection);
metadata.refDocId = doc.sourceNode.nodeId;
}
await this.kvstore.put(nodeKey, metadata, this.metadataCollection);
}
}
async getDocument(docId, raiseError = true) {
const json = await this.kvstore.get(docId, this.nodeCollection);
if (this.isNil(json)) {
if (raiseError) {
throw new Error(`docId ${docId} not found.`);
} else {
return;
}
}
if (!isValidDocJson(json)) {
throw new Error(`Invalid JSON for docId ${docId}`);
}
return jsonToDoc(json, this.serializer);
}
async getRefDocInfo(refDocId) {
const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection);
return refDocInfo ? structuredClone(refDocInfo) : undefined;
}
async getAllRefDocInfo() {
const refDocInfos = await this.kvstore.getAll(this.refDocCollection);
if (this.isNil(refDocInfos)) {
return;
}
return refDocInfos;
}
async refDocExists(refDocId) {
return !this.isNil(await this.getRefDocInfo(refDocId));
}
async documentExists(docId) {
return !this.isNil(await this.kvstore.get(docId, this.nodeCollection));
}
async removeRefDocNode(docId) {
const metadata = await this.kvstore.get(docId, this.metadataCollection);
if (metadata === null) {
return;
}
const refDocId = metadata.refDocId;
if (this.isNil(refDocId)) {
return;
}
const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection);
if (!this.isNil(refDocInfo)) {
if (refDocInfo.nodeIds.length > 0) {
await this.kvstore.put(refDocId, refDocInfo, this.refDocCollection);
}
await this.kvstore.delete(refDocId, this.metadataCollection);
}
}
async deleteDocument(docId, raiseError = true, removeRefDocNode = true) {
if (removeRefDocNode) {
await this.removeRefDocNode(docId);
}
const deleteSuccess = await this.kvstore.delete(docId, this.nodeCollection);
await this.kvstore.delete(docId, this.metadataCollection);
if (!deleteSuccess && raiseError) {
throw new Error(`doc_id ${docId} not found.`);
}
}
async deleteRefDoc(refDocId, raiseError = true) {
const refDocInfo = await this.getRefDocInfo(refDocId);
if (this.isNil(refDocInfo)) {
if (raiseError) {
throw new Error(`ref_doc_id ${refDocId} not found.`);
} else {
return;
}
}
for (const docId of refDocInfo.nodeIds){
await this.deleteDocument(docId, false, false);
}
await this.kvstore.delete(refDocId, this.metadataCollection);
await this.kvstore.delete(refDocId, this.refDocCollection);
}
async setDocumentHash(docId, docHash) {
const metadata = {
docHash: docHash
};
await this.kvstore.put(docId, metadata, this.metadataCollection);
}
async getDocumentHash(docId) {
const metadata = await this.kvstore.get(docId, this.metadataCollection);
return metadata?.docHash;
}
async getAllDocumentHashes() {
const hashes = {};
const metadataDocs = await this.kvstore.getAll(this.metadataCollection);
for(const docId in metadataDocs){
const hash = await this.getDocumentHash(docId);
if (hash) {
hashes[hash] = docId;
}
}
return hashes;
}
isNil(value) {
return value === null || value === undefined;
}
}
exports.BaseDocumentStore = BaseDocumentStore;
exports.KVDocumentStore = KVDocumentStore;
exports.docToJson = docToJson;
exports.isValidDocJson = isValidDocJson;
exports.jsonSerializer = jsonSerializer;
exports.jsonToDoc = jsonToDoc;
exports.noneSerializer = noneSerializer;