UNPKG

embeddings-js

Version:

A NodeJS RAG framework to easily work with LLMs and custom datasets

142 lines (141 loc) 4.97 kB
import createDebugMessages from 'debug'; import weaviate, { ApiKey, generateUuid5 } from 'weaviate-ts-client'; import { toTitleCase } from '../util/strings.js'; export class WeaviateDb { constructor({ host, apiKey, className }) { Object.defineProperty(this, "debug", { enumerable: true, configurable: true, writable: true, value: createDebugMessages('embedjs:vector:WeaviateDb') }); Object.defineProperty(this, "dimensions", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "className", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "client", { enumerable: true, configurable: true, writable: true, value: void 0 }); // @ts-ignore this.client = weaviate.client({ scheme: 'https', host, apiKey: new ApiKey(apiKey) }); this.className = toTitleCase(className); // Weaviate translates the className during create to title case and errors at other places } async init({ dimensions }) { this.dimensions = dimensions; const { classes: list } = await this.client.schema.getter().do(); if (list.map((l) => l.class).indexOf(this.className) > -1) return; await this.client.schema .classCreator() .withClass({ class: this.className, properties: [ { name: 'realId', dataType: ['text'], }, { name: 'pageContent', dataType: ['text'], }, { name: 'uniqueLoaderId', dataType: ['text'], }, { name: 'source', dataType: ['text'], }, ], vectorIndexConfig: { distance: 'cosine', }, }) .do(); } async insertChunks(chunks) { let processed = 0; const batcher = this.client.batch.objectsBatcher(); for (let i = 0; i < chunks.length; i += WeaviateDb.WEAVIATE_INSERT_CHUNK_SIZE) { const chunkBatch = chunks.slice(i, i + WeaviateDb.WEAVIATE_INSERT_CHUNK_SIZE); this.debug(`Inserting Weaviate batch`); const result = await batcher .withObjects(...chunkBatch.map((chunk) => { const chunkId = chunk.metadata.id; delete chunk.metadata.id; return { class: this.className, id: generateUuid5(chunkId), vector: chunk.vector, properties: { uniqueLoaderId: chunk.metadata.uniqueLoaderId, pageContent: chunk.pageContent, ...chunk.metadata, }, }; })) .do(); this.debug('Weaviate errors', result.map((r) => r.result?.errors?.error?.[0].message ?? 'NONE')); processed += chunkBatch.length; } return processed; } async similaritySearch(query, k) { const queryResponse = await this.client.graphql .get() .withClassName(this.className) .withNearVector({ vector: query }) .withFields('uniqueLoaderId pageContent source') .withLimit(k) .do(); return queryResponse.data.Get[this.className].map((match) => { const pageContent = match.pageContent; delete match.pageContent; return { pageContent, metadata: match, }; }); } async getVectorCount() { const queryResponse = await this.client.graphql .aggregate() .withClassName(this.className) .withFields('meta { count }') .do(); return queryResponse.data.Aggregate[this.className][0].meta.count; } async deleteKeys(uniqueLoaderId) { await this.client.batch .objectsBatchDeleter() .withClassName(this.className) .withWhere({ path: ['uniqueLoaderId'], operator: 'ContainsAny', valueTextArray: [uniqueLoaderId], }) .do(); return true; } async reset() { await this.client.schema.classDeleter().withClassName(this.className).do(); await this.init({ dimensions: this.dimensions }); } } Object.defineProperty(WeaviateDb, "WEAVIATE_INSERT_CHUNK_SIZE", { enumerable: true, configurable: true, writable: true, value: 500 });