vexify
Version:
Portable vector database with in-process ONNX embeddings. Zero-config semantic search via SQLite. No external servers required.
194 lines (165 loc) • 5.43 kB
JavaScript
;
const Database = require('better-sqlite3');
const sqliteVec = require('sqlite-vec');
const path = require('path');
const fs = require('fs');
class SQLiteStorageAdapter {
constructor(dbPath = './vecstore.db') {
this.dbPath = dbPath;
const dir = path.dirname(dbPath);
if (!fs.existsSync(dir) && dir !== '.') {
fs.mkdirSync(dir, { recursive: true });
}
this.db = new Database(dbPath);
this.db.loadExtension(sqliteVec.getLoadablePath());
this.preparedStatements = {};
this.init();
}
init() {
this.db.exec(`
CREATE TABLE IF NOT EXISTS documents (
id TEXT PRIMARY KEY,
vector BLOB NOT NULL,
content TEXT,
metadata TEXT,
checksum TEXT NOT NULL,
version TEXT NOT NULL,
created_at INTEGER DEFAULT (strftime('%s', 'now'))
);
CREATE INDEX IF NOT EXISTS idx_checksum ON documents(checksum);
CREATE INDEX IF NOT EXISTS idx_version ON documents(version);
`);
this.preparedStatements.put = this.db.prepare(`
INSERT OR REPLACE INTO documents (id, vector, content, metadata, checksum, version)
VALUES (?, ?, ?, ?, ?, ?)
`);
this.preparedStatements.getByChecksum = this.db.prepare(
'SELECT id FROM documents WHERE checksum = ?'
);
this.preparedStatements.checksumExists = this.db.prepare(
'SELECT COUNT(*) as count FROM documents WHERE checksum = ?'
);
}
async put(doc) {
const vectorBlob = Buffer.from(new Float32Array(doc.vector).buffer);
const metadataJson = doc.metadata ? JSON.stringify(doc.metadata) : null;
const contentJson = doc.content !== undefined ? JSON.stringify(doc.content) : null;
this.preparedStatements.put.run(
doc.id,
vectorBlob,
contentJson,
metadataJson,
doc.checksum,
doc.version
);
}
async putBatch(docs) {
const insert = this.db.transaction((documents) => {
for (const doc of documents) {
const vectorBlob = Buffer.from(new Float32Array(doc.vector).buffer);
const metadataJson = doc.metadata ? JSON.stringify(doc.metadata) : null;
const contentJson = doc.content !== undefined ? JSON.stringify(doc.content) : null;
this.preparedStatements.put.run(
doc.id,
vectorBlob,
contentJson,
metadataJson,
doc.checksum,
doc.version
);
}
});
insert(docs);
}
async getByChecksum(checksum) {
const row = this.preparedStatements.getByChecksum.get(checksum);
return row ? row.id : null;
}
async checksumExists(checksum) {
const result = this.preparedStatements.checksumExists.get(checksum);
return result.count > 0;
}
async getExistingChecksums(checksums) {
if (checksums.length === 0) return [];
const placeholders = checksums.map(() => '?').join(',');
const stmt = this.db.prepare(`
SELECT DISTINCT checksum
FROM documents
WHERE checksum IN (${placeholders})
`);
const rows = stmt.all(...checksums);
return rows.map(r => r.checksum);
}
async getBySource(sourceType, sourceValue) {
const stmt = this.db.prepare(`
SELECT id FROM documents
WHERE json_extract(metadata, ?) = ?
`);
const rows = stmt.all(`$.${sourceType}`, sourceValue);
return rows.map(r => r.id);
}
async getCrawledUrls() {
const stmt = this.db.prepare(`
SELECT DISTINCT json_extract(metadata, '$.crawlUrl') as url
FROM documents
WHERE json_extract(metadata, '$.source') = 'crawl'
AND json_extract(metadata, '$.crawlUrl') IS NOT NULL
`);
const rows = stmt.all();
return new Set(rows.map(r => r.url));
}
async getCrawledUrlsWithHash() {
const stmt = this.db.prepare(`
SELECT
json_extract(metadata, '$.crawlUrl') as url,
json_extract(metadata, '$.contentHash') as hash
FROM documents
WHERE json_extract(metadata, '$.source') = 'crawl'
AND json_extract(metadata, '$.crawlUrl') IS NOT NULL
GROUP BY json_extract(metadata, '$.crawlUrl')
`);
const rows = stmt.all();
const urlMap = new Map();
for (const row of rows) {
urlMap.set(row.url, row.hash);
}
return urlMap;
}
async deleteByIds(ids) {
if (ids.length === 0) return;
const placeholders = ids.map(() => '?').join(',');
const stmt = this.db.prepare(`DELETE FROM documents WHERE id IN (${placeholders})`);
stmt.run(...ids);
}
async deleteByCrawlUrl(url) {
const stmt = this.db.prepare(`
DELETE FROM documents
WHERE json_extract(metadata, '$.crawlUrl') = ?
`);
const result = stmt.run(url);
return result.changes;
}
async getAll() {
const stmt = this.db.prepare('SELECT * FROM documents');
const rows = stmt.all();
return rows.map(row => {
const vectorArray = Array.from(new Float32Array(row.vector.buffer));
return {
id: row.id,
vector: vectorArray,
version: row.version,
checksum: row.checksum,
...(row.content !== null && { content: JSON.parse(row.content) }),
...(row.metadata !== null && { metadata: JSON.parse(row.metadata) })
};
});
}
async delete(id) {
const stmt = this.db.prepare('DELETE FROM documents WHERE id = ?');
stmt.run(id);
}
close() {
this.db.close();
}
}
module.exports = { SQLiteStorageAdapter };