embeddings-js
Version:
A NodeJS RAG framework to easily work with LLMs and custom datasets
96 lines (95 loc) • 3.43 kB
JavaScript
import * as fsOld from 'node:fs';
import * as fs from 'node:fs/promises';
import { connect } from 'vectordb';
export class LanceDb {
constructor({ path, isTemp }) {
Object.defineProperty(this, "isTemp", {
enumerable: true,
configurable: true,
writable: true,
value: true
});
Object.defineProperty(this, "path", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "table", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.isTemp = isTemp !== undefined ? isTemp : false;
this.path = path;
}
async init({ dimensions }) {
if (!this.isTemp && !fsOld.existsSync(this.path)) {
await fs.mkdir(this.path);
}
const dir = await (this.isTemp ? fs.mkdtemp(this.path) : this.path);
const client = await connect(dir);
const list = await client.tableNames();
if (list.indexOf(LanceDb.STATIC_DB_NAME) > -1)
this.table = await client.openTable(LanceDb.STATIC_DB_NAME);
else {
//TODO: You can add a proper schema instead of a sample record now but it requires another package apache-arrow; another install on downstream as well
this.table = await client.createTable(LanceDb.STATIC_DB_NAME, [
{
id: 'md5',
pageContent: 'sample',
vector: Array(dimensions),
uniqueLoaderId: 'sample',
metadata: 'sample',
},
]);
}
}
async insertChunks(chunks) {
const mapped = chunks.map((chunk) => {
const uniqueLoaderId = chunk.metadata.uniqueLoaderId;
delete chunk.metadata.uniqueLoaderId;
return {
id: chunk.metadata.id,
pageContent: chunk.pageContent,
vector: chunk.vector,
uniqueLoaderId,
metadata: JSON.stringify(chunk.metadata),
};
});
await this.table.add(mapped);
return mapped.length; //TODO: check if vectorDb has addressed the issue where add returns undefined
}
async similaritySearch(query, k) {
const results = await this.table.search(query).limit(k).execute();
return (results
//a mandatory record is required by lance during init to get schema
//and this record is also returned in results; we filter it out
.filter((entry) => entry.id !== 'md5')
.map((result) => {
const metadata = JSON.parse(result.metadata);
metadata.uniqueLoaderId = result.uniqueLoaderId;
return {
pageContent: result.pageContent,
metadata,
};
}));
}
async getVectorCount() {
return this.table.countRows();
}
async deleteKeys(uniqueLoaderId) {
await this.table.delete(`\`uniqueLoaderId\` = "${uniqueLoaderId}"`);
return true;
}
async reset() {
await this.table.delete('id IS NOT NULL');
}
}
Object.defineProperty(LanceDb, "STATIC_DB_NAME", {
enumerable: true,
configurable: true,
writable: true,
value: 'vectors'
});