embeddings-js
Version:
A NodeJS RAG framework to easily work with LLMs and custom datasets
112 lines (111 loc) • 4.1 kB
JavaScript
import { Pinecone } from '@pinecone-database/pinecone';
import createDebugMessages from 'debug';
export class PineconeDb {
constructor({ projectName, namespace, indexSpec, }) {
Object.defineProperty(this, "debug", {
enumerable: true,
configurable: true,
writable: true,
value: createDebugMessages('embedjs:vector:PineconeDb')
});
Object.defineProperty(this, "client", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "namespace", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "projectName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "indexSpec", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.client = new Pinecone();
this.projectName = projectName;
this.namespace = namespace;
this.indexSpec = indexSpec;
}
async init({ dimensions }) {
const list = (await this.client.listIndexes()).indexes.map((i) => i.name);
if (list.indexOf(this.projectName) > -1)
return;
await this.client.createIndex({
name: this.projectName,
dimension: dimensions,
spec: this.indexSpec,
metric: 'cosine',
});
}
async insertChunks(chunks) {
let processed = 0;
const index = this.client.Index(this.projectName).namespace(this.namespace);
for (let i = 0; i < chunks.length; i += PineconeDb.PINECONE_INSERT_CHUNK_SIZE) {
const chunkBatch = chunks.slice(i, i + PineconeDb.PINECONE_INSERT_CHUNK_SIZE);
const upsertCommand = chunkBatch.map((chunk) => {
return {
id: chunk.metadata.id,
values: chunk.vector,
metadata: { pageContent: chunk.pageContent, ...chunk.metadata },
};
});
this.debug(`Inserting Pinecone batch`);
await index.upsert(upsertCommand);
processed += chunkBatch.length;
}
return processed;
}
async similaritySearch(query, k) {
const index = this.client.Index(this.projectName).namespace(this.namespace);
const queryResponse = await index.query({
topK: k,
vector: query,
includeMetadata: true,
includeValues: true,
});
return queryResponse.matches.map((match) => {
const pageContent = match.metadata.pageContent;
delete match.metadata.pageContent;
return {
pageContent,
metadata: match.metadata,
};
});
}
async getVectorCount() {
const index = this.client.Index(this.projectName).namespace(this.namespace);
return (await index.describeIndexStats()).totalRecordCount;
}
async deleteKeys(uniqueLoaderId) {
const index = await this.client.Index(this.projectName).namespace(this.namespace);
try {
await index.deleteMany({ uniqueLoaderId: { $eq: uniqueLoaderId } });
return true;
}
catch (e) {
this.debug(`Failed to delete keys for loader '${uniqueLoaderId}'.
Pinecone does not allow deleting by metadata filtering in serverless and free (what they call starter) instances`);
return false;
}
}
async reset() {
await this.client.Index(this.projectName).namespace(this.namespace).deleteAll();
}
}
Object.defineProperty(PineconeDb, "PINECONE_INSERT_CHUNK_SIZE", {
enumerable: true,
configurable: true,
writable: true,
value: 200
}); //Pinecone only allows inserting 2MB worth of chunks at a time; this is an approximation