UNPKG

closevector-web

Version:

CloseVector is fundamentally a vector database. We have made dedicated libraries available for both browsers and node.js, aiming for easy integration no matter your platform. One feature we've been working on is its potential for scalability. Instead of b

243 lines (242 loc) 10.4 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.HNSWLib = exports.CloseVectorHNSWWeb = exports.SynchronousInMemoryDocstore = exports.CloseVectorSaveableVectorStore = void 0; const closevector_hnswlib_wasm_1 = require("closevector-hnswlib-wasm"); const closevector_common_1 = require("closevector-common"); const lib_1 = require("./lib"); const loader_1 = require("./loader"); var closevector_common_2 = require("closevector-common"); Object.defineProperty(exports, "CloseVectorSaveableVectorStore", { enumerable: true, get: function () { return closevector_common_2.CloseVectorSaveableVectorStore; } }); Object.defineProperty(exports, "SynchronousInMemoryDocstore", { enumerable: true, get: function () { return closevector_common_2.SynchronousInMemoryDocstore; } }); let __lib = undefined; class CloseVectorHNSWWeb extends closevector_common_1.CloseVectorSaveableVectorStore { _index; docstore; args; _uuid; _vectorstoreType() { return 'hnswlib'; } constructor(embeddings, args) { super(embeddings, args.credentials); this._index = args.index; this.args = args; this.embeddings = embeddings; this.docstore = args?.docstore ?? new closevector_common_1.SynchronousInMemoryDocstore(); } async addDocuments(documents) { const texts = documents.map(({ pageContent }) => pageContent); return this.addVectors(await this.embeddings.embedDocuments(texts), documents); } static async getHierarchicalNSW(args) { const { HierarchicalNSW } = await CloseVectorHNSWWeb.imports(); if (!args.space) { throw new Error('closevector-hnswlib-wasm requires a space argument'); } if (args.numDimensions === undefined) { throw new Error('closevector-hnswlib-wasm requires a numDimensions argument'); } return new HierarchicalNSW(args.space, args.numDimensions, ''); } async initIndex(vectors) { if (!this._index) { if (this.args.numDimensions === undefined) { this.args.numDimensions = vectors[0].length; this.args.maxElements = vectors.length; } this.index = await CloseVectorHNSWWeb.getHierarchicalNSW(this.args); this.index.initIndex(vectors.length, 48, 200, 100); this.index.setEfSearch(32); } } get index() { if (!this._index) { throw new Error('Vector store not initialised yet. Try calling `addTexts` first.'); } return this._index; } set index(index) { this._index = index; } get uuid() { return this._uuid; } set uuid(uuid) { this._uuid = uuid; } async addVectors(vectors, documents) { if (vectors.length === 0) { return; } await this.initIndex(vectors); // TODO here we could optionally normalise the vectors to unit length // so that dot product is equivalent to cosine similarity, like this // https://github.com/nmslib/hnswlib/issues/384#issuecomment-1155737730 // While we only support OpenAI embeddings this isn't necessary if (vectors.length !== documents.length) { throw new Error(`Vectors and metadatas must have the same length`); } if (vectors[0].length !== this.args.numDimensions) { throw new Error(`Vectors must have the same length as the number of dimensions (${this.args.numDimensions})`); } const capacity = this.index.getMaxElements(); const needed = this.index.getCurrentCount() + vectors.length; if (needed > capacity) { this.index.resizeIndex(needed); } const docstoreSize = this.index.getCurrentCount(); const toSave = {}; for (let i = 0; i < vectors.length; i += 1) { this.index.addPoint(vectors[i], docstoreSize + i, false); toSave[docstoreSize + i] = documents[i]; } this.docstore.add(toSave); } async similaritySearchVectorWithScore(query, k, filter) { if (this.args.numDimensions && !this._index) { await this.initIndex([[]]); } if (query.length !== this.args.numDimensions) { throw new Error(`Query vector must have the same length as the number of dimensions (${this.args.numDimensions})`); } if (k > this.index.getCurrentCount()) { const total = this.index.getCurrentCount(); console.warn(`k (${k}) is greater than the number of elements in the index (${total}), setting k to ${total}`); // eslint-disable-next-line no-param-reassign k = total; } const filterFunction = (label) => { if (!filter) { return true; } const document = this.docstore.search(String(label)); // eslint-disable-next-line no-instanceof/no-instanceof if (typeof document !== 'string') { return filter(document); } return false; }; const result = this.index.searchKnn(query, k, filter ? filterFunction : undefined); return result.neighbors.map((docIndex, resultIndex) => [this.docstore.search(String(docIndex)), result.distances[resultIndex]]); } async saveToCloud(options) { const _credentials = options.credentials || this.credentials; if (!_credentials) { throw new Error('You must provide credentials'); } if (options.uuid) { this.uuid = options.uuid; } const urlResp = await (0, lib_1.createUploadFileOperationUrl)({ uuid: options.uuid ?? this.uuid, description: options?.description ?? new Date().toISOString(), accessKey: _credentials.key, secret: _credentials.secret, public: options?.public }); this.uuid = urlResp.uuid; const url = urlResp.url; await this.save(this.uuid + ".hnsw"); let resp = await (0, loader_1.upload)({ path: this.uuid + ".hnsw", url, onProgress: (progress) => { if (options.onProgress) { options.onProgress({ loaded: progress.uploaded, total: progress.total }); } } }); return resp; } async save(directory) { // should not add /hnswlib-index to directory, because it is added in wasm inside const indexPath = (0, lib_1.pathJoin)(directory, 'hnswlib.index'); const argsPath = (0, lib_1.pathJoin)(directory, 'args.json'); const docstorePath = (0, lib_1.pathJoin)(directory, 'docstore.json'); const argsContent = JSON.stringify(this.args); const docstoreContent = JSON.stringify(Array.from(this.docstore._docs.entries())); await Promise.all([ lib_1.IDBFS.writeStringToFile(argsPath, argsContent), lib_1.IDBFS.writeStringToFile(docstorePath, docstoreContent), this.index.writeIndex(indexPath), ]); } static async loadFromCloud(options) { const path = options.uuid; const { embeddings } = options; const urlResp = options?.public ? await (0, lib_1.createPublicGetFileOperationUrl)({ uuid: options.uuid, accessKey: options.credentials?.key, }) : await (0, lib_1.createGetFileOperationUrl)({ uuid: options.uuid, accessKey: options.credentials?.key, secret: options.credentials?.secret }); await (0, loader_1.download)({ url: urlResp.url, onProgress: options.onProgress }); let instance = await CloseVectorHNSWWeb.load(path + ".hnsw", embeddings); instance.uuid = options.uuid; return instance; } static async load(directory, embeddings) { const argsPath = (0, lib_1.pathJoin)(directory, 'args.json'); const docstorePath = (0, lib_1.pathJoin)(directory, 'docstore.json'); const indexPath = (0, lib_1.pathJoin)(directory, 'hnswlib.index'); const lib = await CloseVectorHNSWWeb.imports(); const argsFileContent = lib.EmscriptenFileSystemManager.getStringFromFile(argsPath); const docstoreFileContent = lib.EmscriptenFileSystemManager.getStringFromFile(docstorePath); const args = JSON.parse(argsFileContent); const docs = JSON.parse(docstoreFileContent); const index = await CloseVectorHNSWWeb.getHierarchicalNSW({ ...args, }); const indexLoaded = await index.readIndex(indexPath, args.maxElements || docs.length); const [docstoreFiles] = [docs, indexLoaded]; args.docstore = new closevector_common_1.SynchronousInMemoryDocstore(new Map(docstoreFiles)); args.index = index; return new CloseVectorHNSWWeb(embeddings, args); } static async fromTexts(texts, metadatas, embeddings, dbConfig) { const docs = []; for (let i = 0; i < texts.length; i += 1) { const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; const newDoc = { pageContent: texts[i], metadata, }; docs.push(newDoc); } return CloseVectorHNSWWeb.fromDocuments(docs, embeddings, dbConfig); } static async fromDocuments(docs, embeddings, dbConfig) { const args = { docstore: dbConfig?.docstore, space: 'cosine', maxElements: docs.length, }; const instance = new this(embeddings, args); await instance.addDocuments(docs); return instance; } static async imports() { try { if (__lib) { return __lib; } const lib = await (0, closevector_hnswlib_wasm_1.loadHnswlib)(); __lib = lib; return lib; // eslint-disable-next-line @typescript-eslint/no-explicit-any } catch (err) { throw new Error(`Could not import closevector-hnswlib-wasm.\nError: ${err?.message}`); } } } exports.CloseVectorHNSWWeb = CloseVectorHNSWWeb; exports.HNSWLib = CloseVectorHNSWWeb;