@llamaindex/core
Version: 
LlamaIndex Core Module
248 lines (243 loc) • 8.9 kB
JavaScript
Object.defineProperty(exports, '__esModule', { value: true });
var env = require('@llamaindex/env');
var index_cjs = require('../../../global/dist/index.cjs');
var index_cjs$1 = require('../../../schema/dist/index.cjs');
const TYPE_KEY = "__type__";
const DATA_KEY = "__data__";
const jsonSerializer = {
    toPersistence (data) {
        return JSON.stringify(data);
    },
    fromPersistence (data) {
        return JSON.parse(data);
    }
};
const noneSerializer = {
    toPersistence (data) {
        return data;
    },
    fromPersistence (data) {
        return data;
    }
};
function isValidDocJson(docJson) {
    return typeof docJson === "object" && docJson !== null && docJson[TYPE_KEY] !== undefined && docJson[DATA_KEY] !== undefined;
}
function docToJson(doc, serializer) {
    return {
        [DATA_KEY]: serializer.toPersistence(doc.toJSON()),
        [TYPE_KEY]: doc.type
    };
}
function jsonToDoc(docDict, serializer) {
    const docType = docDict[TYPE_KEY];
    // fixme: zod type check this
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    const dataDict = serializer.fromPersistence(docDict[DATA_KEY]);
    let doc;
    if (docType === index_cjs$1.ObjectType.DOCUMENT) {
        doc = new index_cjs$1.Document({
            text: dataDict.text,
            id_: dataDict.id_,
            embedding: dataDict.embedding,
            hash: dataDict.hash,
            metadata: dataDict.metadata
        });
    } else if (docType === index_cjs$1.ObjectType.TEXT) {
        doc = new index_cjs$1.TextNode({
            text: dataDict.text,
            id_: dataDict.id_,
            hash: dataDict.hash,
            metadata: dataDict.metadata,
            relationships: dataDict.relationships
        });
    } else {
        throw new Error(`Unknown doc type: ${docType}`);
    }
    return doc;
}
const DEFAULT_PERSIST_PATH = env.path.join(index_cjs.DEFAULT_PERSIST_DIR, index_cjs.DEFAULT_DOC_STORE_PERSIST_FILENAME);
class BaseDocumentStore {
    // Save/load
    persist(persistPath = DEFAULT_PERSIST_PATH) {
    // Persist the docstore to a file.
    }
    // Nodes
    getNodes(nodeIds, raiseError = true) {
        return Promise.all(nodeIds.map((nodeId)=>this.getNode(nodeId, raiseError)));
    }
    async getNode(nodeId, raiseError = true) {
        const doc = await this.getDocument(nodeId, raiseError);
        if (!(doc instanceof index_cjs$1.BaseNode)) {
            throw new Error(`Document ${nodeId} is not a Node.`);
        }
        return doc;
    }
    async getNodeDict(nodeIdDict) {
        const result = {};
        for(const index in nodeIdDict){
            result[index] = await this.getNode(nodeIdDict[index]);
        }
        return result;
    }
    constructor(){
        // eslint-disable-next-line @typescript-eslint/no-explicit-any
        this.serializer = jsonSerializer;
    }
}
class KVDocumentStore extends BaseDocumentStore {
    constructor(kvstore, namespace = index_cjs.DEFAULT_NAMESPACE){
        super();
        this.kvstore = kvstore;
        this.nodeCollection = `${namespace}/data`;
        this.refDocCollection = `${namespace}/ref_doc_info`;
        this.metadataCollection = `${namespace}/metadata`;
    }
    async docs() {
        const jsonDict = await this.kvstore.getAll(this.nodeCollection);
        const docs = {};
        for(const key in jsonDict){
            const value = jsonDict[key];
            if (isValidDocJson(value)) {
                docs[key] = jsonToDoc(value, this.serializer);
            } else {
                console.warn(`Invalid JSON for docId ${key}`);
            }
        }
        return docs;
    }
    async addDocuments(docs, allowUpdate = true) {
        for(let idx = 0; idx < docs.length; idx++){
            const doc = docs[idx];
            if (doc.id_ === null) {
                throw new Error("doc_id not set");
            }
            if (!allowUpdate && await this.documentExists(doc.id_)) {
                throw new Error(`doc_id ${doc.id_} already exists. Set allow_update to True to overwrite.`);
            }
            const nodeKey = doc.id_;
            const data = docToJson(doc, this.serializer);
            await this.kvstore.put(nodeKey, data, this.nodeCollection);
            const metadata = {
                docHash: doc.hash
            };
            if (doc.type === index_cjs$1.ObjectType.TEXT && doc.sourceNode !== undefined) {
                const refDocInfo = await this.getRefDocInfo(doc.sourceNode.nodeId) || {
                    nodeIds: [],
                    extraInfo: {}
                };
                refDocInfo.nodeIds.push(doc.id_);
                if (Object.keys(refDocInfo.extraInfo).length === 0) {
                    refDocInfo.extraInfo = {};
                }
                await this.kvstore.put(doc.sourceNode.nodeId, refDocInfo, this.refDocCollection);
                metadata.refDocId = doc.sourceNode.nodeId;
            }
            await this.kvstore.put(nodeKey, metadata, this.metadataCollection);
        }
    }
    async getDocument(docId, raiseError = true) {
        const json = await this.kvstore.get(docId, this.nodeCollection);
        if (this.isNil(json)) {
            if (raiseError) {
                throw new Error(`docId ${docId} not found.`);
            } else {
                return;
            }
        }
        if (!isValidDocJson(json)) {
            throw new Error(`Invalid JSON for docId ${docId}`);
        }
        return jsonToDoc(json, this.serializer);
    }
    async getRefDocInfo(refDocId) {
        const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection);
        return refDocInfo ? structuredClone(refDocInfo) : undefined;
    }
    async getAllRefDocInfo() {
        const refDocInfos = await this.kvstore.getAll(this.refDocCollection);
        if (this.isNil(refDocInfos)) {
            return;
        }
        return refDocInfos;
    }
    async refDocExists(refDocId) {
        return !this.isNil(await this.getRefDocInfo(refDocId));
    }
    async documentExists(docId) {
        return !this.isNil(await this.kvstore.get(docId, this.nodeCollection));
    }
    async removeRefDocNode(docId) {
        const metadata = await this.kvstore.get(docId, this.metadataCollection);
        if (metadata === null) {
            return;
        }
        const refDocId = metadata.refDocId;
        if (this.isNil(refDocId)) {
            return;
        }
        const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection);
        if (!this.isNil(refDocInfo)) {
            if (refDocInfo.nodeIds.length > 0) {
                await this.kvstore.put(refDocId, refDocInfo, this.refDocCollection);
            }
            await this.kvstore.delete(refDocId, this.metadataCollection);
        }
    }
    async deleteDocument(docId, raiseError = true, removeRefDocNode = true) {
        if (removeRefDocNode) {
            await this.removeRefDocNode(docId);
        }
        const deleteSuccess = await this.kvstore.delete(docId, this.nodeCollection);
        await this.kvstore.delete(docId, this.metadataCollection);
        if (!deleteSuccess && raiseError) {
            throw new Error(`doc_id ${docId} not found.`);
        }
    }
    async deleteRefDoc(refDocId, raiseError = true) {
        const refDocInfo = await this.getRefDocInfo(refDocId);
        if (this.isNil(refDocInfo)) {
            if (raiseError) {
                throw new Error(`ref_doc_id ${refDocId} not found.`);
            } else {
                return;
            }
        }
        for (const docId of refDocInfo.nodeIds){
            await this.deleteDocument(docId, false, false);
        }
        await this.kvstore.delete(refDocId, this.metadataCollection);
        await this.kvstore.delete(refDocId, this.refDocCollection);
    }
    async setDocumentHash(docId, docHash) {
        const metadata = {
            docHash: docHash
        };
        await this.kvstore.put(docId, metadata, this.metadataCollection);
    }
    async getDocumentHash(docId) {
        const metadata = await this.kvstore.get(docId, this.metadataCollection);
        return metadata?.docHash;
    }
    async getAllDocumentHashes() {
        const hashes = {};
        const metadataDocs = await this.kvstore.getAll(this.metadataCollection);
        for(const docId in metadataDocs){
            const hash = await this.getDocumentHash(docId);
            if (hash) {
                hashes[hash] = docId;
            }
        }
        return hashes;
    }
    isNil(value) {
        return value === null || value === undefined;
    }
}
exports.BaseDocumentStore = BaseDocumentStore;
exports.KVDocumentStore = KVDocumentStore;
exports.docToJson = docToJson;
exports.isValidDocJson = isValidDocJson;
exports.jsonSerializer = jsonSerializer;
exports.jsonToDoc = jsonToDoc;
exports.noneSerializer = noneSerializer;