UNPKG

embeddings-js

Version:

A NodeJS RAG framework to easily work with LLMs and custom datasets

112 lines (111 loc) 4.1 kB
import { Pinecone } from '@pinecone-database/pinecone'; import createDebugMessages from 'debug'; export class PineconeDb { constructor({ projectName, namespace, indexSpec, }) { Object.defineProperty(this, "debug", { enumerable: true, configurable: true, writable: true, value: createDebugMessages('embedjs:vector:PineconeDb') }); Object.defineProperty(this, "client", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "namespace", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "projectName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "indexSpec", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.client = new Pinecone(); this.projectName = projectName; this.namespace = namespace; this.indexSpec = indexSpec; } async init({ dimensions }) { const list = (await this.client.listIndexes()).indexes.map((i) => i.name); if (list.indexOf(this.projectName) > -1) return; await this.client.createIndex({ name: this.projectName, dimension: dimensions, spec: this.indexSpec, metric: 'cosine', }); } async insertChunks(chunks) { let processed = 0; const index = this.client.Index(this.projectName).namespace(this.namespace); for (let i = 0; i < chunks.length; i += PineconeDb.PINECONE_INSERT_CHUNK_SIZE) { const chunkBatch = chunks.slice(i, i + PineconeDb.PINECONE_INSERT_CHUNK_SIZE); const upsertCommand = chunkBatch.map((chunk) => { return { id: chunk.metadata.id, values: chunk.vector, metadata: { pageContent: chunk.pageContent, ...chunk.metadata }, }; }); this.debug(`Inserting Pinecone batch`); await index.upsert(upsertCommand); processed += chunkBatch.length; } return processed; } async similaritySearch(query, k) { const index = this.client.Index(this.projectName).namespace(this.namespace); const queryResponse = await index.query({ topK: k, vector: query, includeMetadata: true, includeValues: true, }); return queryResponse.matches.map((match) => { const pageContent = match.metadata.pageContent; delete match.metadata.pageContent; return { pageContent, metadata: match.metadata, }; }); } async getVectorCount() { const index = this.client.Index(this.projectName).namespace(this.namespace); return (await index.describeIndexStats()).totalRecordCount; } async deleteKeys(uniqueLoaderId) { const index = await this.client.Index(this.projectName).namespace(this.namespace); try { await index.deleteMany({ uniqueLoaderId: { $eq: uniqueLoaderId } }); return true; } catch (e) { this.debug(`Failed to delete keys for loader '${uniqueLoaderId}'. Pinecone does not allow deleting by metadata filtering in serverless and free (what they call starter) instances`); return false; } } async reset() { await this.client.Index(this.projectName).namespace(this.namespace).deleteAll(); } } Object.defineProperty(PineconeDb, "PINECONE_INSERT_CHUNK_SIZE", { enumerable: true, configurable: true, writable: true, value: 200 }); //Pinecone only allows inserting 2MB worth of chunks at a time; this is an approximation