hnswsqlite
Version:
Vector search with HNSWlib and SQLite in TypeScript.
150 lines (149 loc) • 5.79 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.VectorStore = void 0;
const hnswlib_node_1 = require("hnswlib-node");
const better_sqlite3_1 = __importDefault(require("better-sqlite3"));
class VectorStore {
constructor(dbPath, dim = 1536) {
this.db = new better_sqlite3_1.default(dbPath, { verbose: console.log });
this.db.pragma('journal_mode = WAL'); // Better concurrency
this.db.pragma('synchronous = NORMAL'); // Better write performance
this.db.pragma('cache_size = -2000'); // 2MB cache
this.dim = dim;
this.idMap = new Set();
this._initTables();
this._initPreparedStatements();
this._initIndex();
this._loadEmbeddings();
}
_initTables() {
// Enable WAL mode for better concurrency
this.db.pragma('journal_mode = WAL');
this.db.exec(`
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
text TEXT,
embedding BLOB
);
CREATE INDEX IF NOT EXISTS idx_documents_id ON documents(id);
`);
}
_initPreparedStatements() {
this.preparedStatements = {
getDocument: this.db.prepare('SELECT id, text FROM documents WHERE id = ?'),
searchDocuments: this.db.prepare('SELECT id, text FROM documents WHERE id = ?'),
deleteDocument: this.db.prepare('DELETE FROM documents WHERE id = ?'),
getAllEmbeddings: this.db.prepare('SELECT id, embedding FROM documents')
};
}
_initIndex() {
this.index = new hnswlib_node_1.HierarchicalNSW('l2', this.dim);
this.index.initIndex(10000); // initial capacity
}
_loadEmbeddings() {
// Use prepared statement for better performance
const rows = this.preparedStatements.getAllEmbeddings.all();
// Batch add points to the index
const batchSize = 1000;
for (let i = 0; i < rows.length; i += batchSize) {
const batch = rows.slice(i, i + batchSize);
for (const row of batch) {
try {
const emb = Buffer.from(row.embedding).buffer;
const arr = Array.from(new Float32Array(emb));
this.index.addPoint(arr, row.id, false); // false = don't save index after each add
this.idMap.add(row.id);
}
catch (error) {
console.error(`Error loading embedding for document ${row.id}:`, error);
}
}
// Save index every batch to prevent memory issues
if (i + batchSize < rows.length) {
this.index.writeIndexSync();
}
}
// Save the final index
this.index.writeIndexSync();
}
addDocument(text, embedding) {
const embBuf = Buffer.from(new Float32Array(embedding).buffer);
const result = this.db.prepare('INSERT INTO documents (text, embedding) VALUES (?, ?)').run(text, embBuf);
const docId = Number(result.lastInsertRowid);
this.index.addPoint(embedding, docId);
this.idMap.add(docId);
return docId;
}
/**
* Search for similar documents using vector similarity
* @param embedding - The query embedding vector
* @param k - Number of nearest neighbors to return
* @returns Array of matching documents with id and text
*/
search(embedding, k = 5) {
const neighbors = this.index.searchKnn(embedding, k);
const results = [];
// Use transaction for batch read
const transaction = this.db.transaction((ids) => {
return ids.map(id => this.preparedStatements.searchDocuments.get(id));
});
const rows = transaction(neighbors.neighbors);
return rows.filter(Boolean);
}
/**
* Delete a document by ID
* @param id - The ID of the document to delete
* @returns boolean indicating success
*/
deleteDocument(id) {
try {
// Begin transaction
this.db.prepare('BEGIN').run();
// Delete from database
const result = this.preparedStatements.deleteDocument.run(id);
// Remove from in-memory index if it exists
if (this.idMap.has(id)) {
// Note: HNSWlib doesn't support direct deletion, we'll mark it as deleted in our idMap
this.idMap.delete(id);
// For a complete solution, you might want to rebuild the index periodically
}
// Commit transaction
this.db.prepare('COMMIT').run();
return result.changes > 0;
}
catch (error) {
this.db.prepare('ROLLBACK').run();
console.error('Error deleting document:', error);
return false;
}
}
/**
* Batch delete multiple documents
* @param ids - Array of document IDs to delete
* @returns number of successfully deleted documents
*/
deleteDocuments(ids) {
if (!ids.length)
return 0;
try {
// Use a transaction for batch deletion
return this.db.transaction((ids) => {
return ids.reduce((count, id) => {
const deleted = this.deleteDocument(id);
return count + (deleted ? 1 : 0);
}, 0);
})(ids);
}
catch (error) {
console.error('Error in batch delete:', error);
return 0;
}
}
close() {
this.db.close();
}
}
exports.VectorStore = VectorStore;