UNPKG

@llamaindex/core

Version:
248 lines (243 loc) 8.9 kB
Object.defineProperty(exports, '__esModule', { value: true }); var env = require('@llamaindex/env'); var index_cjs = require('../../../global/dist/index.cjs'); var index_cjs$1 = require('../../../schema/dist/index.cjs'); const TYPE_KEY = "__type__"; const DATA_KEY = "__data__"; const jsonSerializer = { toPersistence (data) { return JSON.stringify(data); }, fromPersistence (data) { return JSON.parse(data); } }; const noneSerializer = { toPersistence (data) { return data; }, fromPersistence (data) { return data; } }; function isValidDocJson(docJson) { return typeof docJson === "object" && docJson !== null && docJson[TYPE_KEY] !== undefined && docJson[DATA_KEY] !== undefined; } function docToJson(doc, serializer) { return { [DATA_KEY]: serializer.toPersistence(doc.toJSON()), [TYPE_KEY]: doc.type }; } function jsonToDoc(docDict, serializer) { const docType = docDict[TYPE_KEY]; // fixme: zod type check this // eslint-disable-next-line @typescript-eslint/no-explicit-any const dataDict = serializer.fromPersistence(docDict[DATA_KEY]); let doc; if (docType === index_cjs$1.ObjectType.DOCUMENT) { doc = new index_cjs$1.Document({ text: dataDict.text, id_: dataDict.id_, embedding: dataDict.embedding, hash: dataDict.hash, metadata: dataDict.metadata }); } else if (docType === index_cjs$1.ObjectType.TEXT) { doc = new index_cjs$1.TextNode({ text: dataDict.text, id_: dataDict.id_, hash: dataDict.hash, metadata: dataDict.metadata, relationships: dataDict.relationships }); } else { throw new Error(`Unknown doc type: ${docType}`); } return doc; } const DEFAULT_PERSIST_PATH = env.path.join(index_cjs.DEFAULT_PERSIST_DIR, index_cjs.DEFAULT_DOC_STORE_PERSIST_FILENAME); class BaseDocumentStore { // Save/load persist(persistPath = DEFAULT_PERSIST_PATH) { // Persist the docstore to a file. } // Nodes getNodes(nodeIds, raiseError = true) { return Promise.all(nodeIds.map((nodeId)=>this.getNode(nodeId, raiseError))); } async getNode(nodeId, raiseError = true) { const doc = await this.getDocument(nodeId, raiseError); if (!(doc instanceof index_cjs$1.BaseNode)) { throw new Error(`Document ${nodeId} is not a Node.`); } return doc; } async getNodeDict(nodeIdDict) { const result = {}; for(const index in nodeIdDict){ result[index] = await this.getNode(nodeIdDict[index]); } return result; } constructor(){ // eslint-disable-next-line @typescript-eslint/no-explicit-any this.serializer = jsonSerializer; } } class KVDocumentStore extends BaseDocumentStore { constructor(kvstore, namespace = index_cjs.DEFAULT_NAMESPACE){ super(); this.kvstore = kvstore; this.nodeCollection = `${namespace}/data`; this.refDocCollection = `${namespace}/ref_doc_info`; this.metadataCollection = `${namespace}/metadata`; } async docs() { const jsonDict = await this.kvstore.getAll(this.nodeCollection); const docs = {}; for(const key in jsonDict){ const value = jsonDict[key]; if (isValidDocJson(value)) { docs[key] = jsonToDoc(value, this.serializer); } else { console.warn(`Invalid JSON for docId ${key}`); } } return docs; } async addDocuments(docs, allowUpdate = true) { for(let idx = 0; idx < docs.length; idx++){ const doc = docs[idx]; if (doc.id_ === null) { throw new Error("doc_id not set"); } if (!allowUpdate && await this.documentExists(doc.id_)) { throw new Error(`doc_id ${doc.id_} already exists. Set allow_update to True to overwrite.`); } const nodeKey = doc.id_; const data = docToJson(doc, this.serializer); await this.kvstore.put(nodeKey, data, this.nodeCollection); const metadata = { docHash: doc.hash }; if (doc.type === index_cjs$1.ObjectType.TEXT && doc.sourceNode !== undefined) { const refDocInfo = await this.getRefDocInfo(doc.sourceNode.nodeId) || { nodeIds: [], extraInfo: {} }; refDocInfo.nodeIds.push(doc.id_); if (Object.keys(refDocInfo.extraInfo).length === 0) { refDocInfo.extraInfo = {}; } await this.kvstore.put(doc.sourceNode.nodeId, refDocInfo, this.refDocCollection); metadata.refDocId = doc.sourceNode.nodeId; } await this.kvstore.put(nodeKey, metadata, this.metadataCollection); } } async getDocument(docId, raiseError = true) { const json = await this.kvstore.get(docId, this.nodeCollection); if (this.isNil(json)) { if (raiseError) { throw new Error(`docId ${docId} not found.`); } else { return; } } if (!isValidDocJson(json)) { throw new Error(`Invalid JSON for docId ${docId}`); } return jsonToDoc(json, this.serializer); } async getRefDocInfo(refDocId) { const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection); return refDocInfo ? structuredClone(refDocInfo) : undefined; } async getAllRefDocInfo() { const refDocInfos = await this.kvstore.getAll(this.refDocCollection); if (this.isNil(refDocInfos)) { return; } return refDocInfos; } async refDocExists(refDocId) { return !this.isNil(await this.getRefDocInfo(refDocId)); } async documentExists(docId) { return !this.isNil(await this.kvstore.get(docId, this.nodeCollection)); } async removeRefDocNode(docId) { const metadata = await this.kvstore.get(docId, this.metadataCollection); if (metadata === null) { return; } const refDocId = metadata.refDocId; if (this.isNil(refDocId)) { return; } const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection); if (!this.isNil(refDocInfo)) { if (refDocInfo.nodeIds.length > 0) { await this.kvstore.put(refDocId, refDocInfo, this.refDocCollection); } await this.kvstore.delete(refDocId, this.metadataCollection); } } async deleteDocument(docId, raiseError = true, removeRefDocNode = true) { if (removeRefDocNode) { await this.removeRefDocNode(docId); } const deleteSuccess = await this.kvstore.delete(docId, this.nodeCollection); await this.kvstore.delete(docId, this.metadataCollection); if (!deleteSuccess && raiseError) { throw new Error(`doc_id ${docId} not found.`); } } async deleteRefDoc(refDocId, raiseError = true) { const refDocInfo = await this.getRefDocInfo(refDocId); if (this.isNil(refDocInfo)) { if (raiseError) { throw new Error(`ref_doc_id ${refDocId} not found.`); } else { return; } } for (const docId of refDocInfo.nodeIds){ await this.deleteDocument(docId, false, false); } await this.kvstore.delete(refDocId, this.metadataCollection); await this.kvstore.delete(refDocId, this.refDocCollection); } async setDocumentHash(docId, docHash) { const metadata = { docHash: docHash }; await this.kvstore.put(docId, metadata, this.metadataCollection); } async getDocumentHash(docId) { const metadata = await this.kvstore.get(docId, this.metadataCollection); return metadata?.docHash; } async getAllDocumentHashes() { const hashes = {}; const metadataDocs = await this.kvstore.getAll(this.metadataCollection); for(const docId in metadataDocs){ const hash = await this.getDocumentHash(docId); if (hash) { hashes[hash] = docId; } } return hashes; } isNil(value) { return value === null || value === undefined; } } exports.BaseDocumentStore = BaseDocumentStore; exports.KVDocumentStore = KVDocumentStore; exports.docToJson = docToJson; exports.isValidDocJson = isValidDocJson; exports.jsonSerializer = jsonSerializer; exports.jsonToDoc = jsonToDoc; exports.noneSerializer = noneSerializer;