UNPKG

vexify

Version:

Portable vector database with in-process ONNX embeddings. Zero-config semantic search via SQLite. No external servers required.

194 lines (165 loc) 5.43 kB
'use strict'; const Database = require('better-sqlite3'); const sqliteVec = require('sqlite-vec'); const path = require('path'); const fs = require('fs'); class SQLiteStorageAdapter { constructor(dbPath = './vecstore.db') { this.dbPath = dbPath; const dir = path.dirname(dbPath); if (!fs.existsSync(dir) && dir !== '.') { fs.mkdirSync(dir, { recursive: true }); } this.db = new Database(dbPath); this.db.loadExtension(sqliteVec.getLoadablePath()); this.preparedStatements = {}; this.init(); } init() { this.db.exec(` CREATE TABLE IF NOT EXISTS documents ( id TEXT PRIMARY KEY, vector BLOB NOT NULL, content TEXT, metadata TEXT, checksum TEXT NOT NULL, version TEXT NOT NULL, created_at INTEGER DEFAULT (strftime('%s', 'now')) ); CREATE INDEX IF NOT EXISTS idx_checksum ON documents(checksum); CREATE INDEX IF NOT EXISTS idx_version ON documents(version); `); this.preparedStatements.put = this.db.prepare(` INSERT OR REPLACE INTO documents (id, vector, content, metadata, checksum, version) VALUES (?, ?, ?, ?, ?, ?) `); this.preparedStatements.getByChecksum = this.db.prepare( 'SELECT id FROM documents WHERE checksum = ?' ); this.preparedStatements.checksumExists = this.db.prepare( 'SELECT COUNT(*) as count FROM documents WHERE checksum = ?' ); } async put(doc) { const vectorBlob = Buffer.from(new Float32Array(doc.vector).buffer); const metadataJson = doc.metadata ? JSON.stringify(doc.metadata) : null; const contentJson = doc.content !== undefined ? JSON.stringify(doc.content) : null; this.preparedStatements.put.run( doc.id, vectorBlob, contentJson, metadataJson, doc.checksum, doc.version ); } async putBatch(docs) { const insert = this.db.transaction((documents) => { for (const doc of documents) { const vectorBlob = Buffer.from(new Float32Array(doc.vector).buffer); const metadataJson = doc.metadata ? JSON.stringify(doc.metadata) : null; const contentJson = doc.content !== undefined ? JSON.stringify(doc.content) : null; this.preparedStatements.put.run( doc.id, vectorBlob, contentJson, metadataJson, doc.checksum, doc.version ); } }); insert(docs); } async getByChecksum(checksum) { const row = this.preparedStatements.getByChecksum.get(checksum); return row ? row.id : null; } async checksumExists(checksum) { const result = this.preparedStatements.checksumExists.get(checksum); return result.count > 0; } async getExistingChecksums(checksums) { if (checksums.length === 0) return []; const placeholders = checksums.map(() => '?').join(','); const stmt = this.db.prepare(` SELECT DISTINCT checksum FROM documents WHERE checksum IN (${placeholders}) `); const rows = stmt.all(...checksums); return rows.map(r => r.checksum); } async getBySource(sourceType, sourceValue) { const stmt = this.db.prepare(` SELECT id FROM documents WHERE json_extract(metadata, ?) = ? `); const rows = stmt.all(`$.${sourceType}`, sourceValue); return rows.map(r => r.id); } async getCrawledUrls() { const stmt = this.db.prepare(` SELECT DISTINCT json_extract(metadata, '$.crawlUrl') as url FROM documents WHERE json_extract(metadata, '$.source') = 'crawl' AND json_extract(metadata, '$.crawlUrl') IS NOT NULL `); const rows = stmt.all(); return new Set(rows.map(r => r.url)); } async getCrawledUrlsWithHash() { const stmt = this.db.prepare(` SELECT json_extract(metadata, '$.crawlUrl') as url, json_extract(metadata, '$.contentHash') as hash FROM documents WHERE json_extract(metadata, '$.source') = 'crawl' AND json_extract(metadata, '$.crawlUrl') IS NOT NULL GROUP BY json_extract(metadata, '$.crawlUrl') `); const rows = stmt.all(); const urlMap = new Map(); for (const row of rows) { urlMap.set(row.url, row.hash); } return urlMap; } async deleteByIds(ids) { if (ids.length === 0) return; const placeholders = ids.map(() => '?').join(','); const stmt = this.db.prepare(`DELETE FROM documents WHERE id IN (${placeholders})`); stmt.run(...ids); } async deleteByCrawlUrl(url) { const stmt = this.db.prepare(` DELETE FROM documents WHERE json_extract(metadata, '$.crawlUrl') = ? `); const result = stmt.run(url); return result.changes; } async getAll() { const stmt = this.db.prepare('SELECT * FROM documents'); const rows = stmt.all(); return rows.map(row => { const vectorArray = Array.from(new Float32Array(row.vector.buffer)); return { id: row.id, vector: vectorArray, version: row.version, checksum: row.checksum, ...(row.content !== null && { content: JSON.parse(row.content) }), ...(row.metadata !== null && { metadata: JSON.parse(row.metadata) }) }; }); } async delete(id) { const stmt = this.db.prepare('DELETE FROM documents WHERE id = ?'); stmt.run(id); } close() { this.db.close(); } } module.exports = { SQLiteStorageAdapter };