askexperts
Version:
AskExperts SDK: build and use AI experts - ask them questions and pay with bitcoin on an open protocol
593 lines • 25.3 kB
JavaScript
import { DatabaseSync } from "node:sqlite";
import crypto from "crypto";
import { debugDocstore, debugError } from "../common/debug.js";
/**
* SQLite implementation
*/
export class DocStoreSQLite {
/**
* Creates a new DocStoreSQLite instance
* @param dbPath - Path to the SQLite database file
*/
constructor(dbPath) {
this.BATCH_SIZE = 1000;
this.RETRY_INTERVAL_MS = 10000; // 10 seconds
debugDocstore(`Initializing DocStoreSQLite with database at: ${dbPath}`);
this.db = new DatabaseSync(dbPath);
this.initDatabase();
}
/**
* Initialize the database by creating required tables if they don't exist
*/
initDatabase() {
// Allow concurrent readers
this.db.exec('PRAGMA journal_mode = WAL;');
// Wait up to 3 seconds for locks
this.db.exec('PRAGMA busy_timeout = 3000;');
// Create docstores table with new fields for embeddings model
this.db.exec(`
CREATE TABLE IF NOT EXISTS docstores (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
timestamp INTEGER NOT NULL,
model TEXT,
vector_size INTEGER,
options TEXT
)
`);
// Create docs table with auto-incremented aid field and BLOB for embeddings
this.db.exec(`
CREATE TABLE IF NOT EXISTS docs (
aid INTEGER PRIMARY KEY AUTOINCREMENT,
id TEXT NOT NULL,
docstore_id TEXT NOT NULL,
timestamp INTEGER NOT NULL,
created_at INTEGER NOT NULL,
type TEXT NOT NULL,
data TEXT NOT NULL,
embeddings BLOB,
UNIQUE(docstore_id, id)
)
`);
// Create indexes for better query performance
this.db.exec("CREATE INDEX IF NOT EXISTS idx_docs_docstore_id ON docs (docstore_id)");
this.db.exec("CREATE INDEX IF NOT EXISTS idx_docs_timestamp ON docs (timestamp)");
this.db.exec("CREATE INDEX IF NOT EXISTS idx_docs_type ON docs (type)");
// Migration: Add user_id column to docstores table if it doesn't exist
try {
// Check if user_id column exists in docstores table
const docstoreColumns = this.db.prepare("PRAGMA table_info(docstores)").all();
const hasUserIdColumn = docstoreColumns.some((col) => col.name === 'user_id');
if (!hasUserIdColumn) {
debugDocstore("Adding user_id column to docstores table");
this.db.exec("ALTER TABLE docstores ADD COLUMN user_id TEXT NOT NULL DEFAULT ''");
this.db.exec("CREATE INDEX IF NOT EXISTS idx_docstores_user_id ON docstores (user_id)");
}
}
catch (error) {
debugError("Error adding user_id column to docstores table:", error);
}
// Migration: Add user_id column to docs table if it doesn't exist
try {
// Check if user_id column exists in docs table
const docsColumns = this.db.prepare("PRAGMA table_info(docs)").all();
const hasUserIdColumn = docsColumns.some((col) => col.name === 'user_id');
if (!hasUserIdColumn) {
debugDocstore("Adding user_id column to docs table");
this.db.exec("ALTER TABLE docs ADD COLUMN user_id TEXT NOT NULL DEFAULT ''");
this.db.exec("CREATE INDEX IF NOT EXISTS idx_docs_user_id ON docs (user_id)");
}
}
catch (error) {
debugError("Error adding user_id column to docs table:", error);
}
}
/**
* Subscribe to documents in a docstore with batched queries
* @param docstore_id - ID of the docstore to subscribe to
* @param type - Type of documents to filter by
* @param since - Start timestamp for filtering documents
* @param until - End timestamp for filtering documents
* @param onDoc - Callback function to handle each document
* @returns Subscription object to manage the subscription
*/
async subscribe(options, onDoc) {
let isActive = true;
let pauseTimeout;
let lastAid = 0; // Use aid for pagination instead of id
const { docstore_id, type, since, until, user_id } = options;
debugDocstore(`Subscribing to docstore: ${docstore_id}, type: ${type || 'all'}, since: ${since || 'beginning'}, until: ${until || 'now'}, user_id: ${user_id || 'all'}`);
// Function to convert row to Doc interface
const rowToDoc = (row) => {
// Convert embeddings from BLOB to Float32Array[]
let embeddingsArray = [];
if (row.embeddings) {
embeddingsArray = this.blobToFloat32Arrays(row.embeddings, row.docstore_id.toString());
}
return {
id: row.id?.toString() || "",
docstore_id: row.docstore_id?.toString() || "",
timestamp: Number(row.timestamp || 0),
created_at: Number(row.created_at || 0),
type: row.type?.toString() || "",
data: row.data?.toString() || "",
embeddings: embeddingsArray,
user_id: row.user_id?.toString() || "",
// aid is not included in the returned Doc object as it's an internal implementation detail
};
};
// Function to fetch a batch of documents
const fetchBatch = async () => {
if (!isActive)
return;
// Build query dynamically based on provided filters
let query = `
SELECT * FROM docs
WHERE docstore_id = ?
`;
const queryParams = [docstore_id];
// Add user_id filter if defined
if (user_id !== undefined) {
query += ` AND user_id = ?`;
queryParams.push(user_id);
}
// Add type filter if defined
if (type !== undefined) {
query += ` AND type = ?`;
queryParams.push(type);
}
// Add since filter if defined
if (since !== undefined) {
query += ` AND timestamp >= ?`;
queryParams.push(since);
}
// Add until filter if defined
if (until !== undefined) {
query += ` AND timestamp <= ?`;
queryParams.push(until);
}
// Use aid for pagination instead of id
query += `
AND aid > ?
ORDER BY aid ASC
LIMIT ?
`;
queryParams.push(lastAid, this.BATCH_SIZE);
const stmt = this.db.prepare(query);
const rows = stmt.all(...queryParams);
// Process the batch sequentially to respect backpressure
for (let i = 0; i < rows.length; i++) {
if (!isActive)
return;
const row = rows[i];
const doc = rowToDoc(row);
await onDoc(doc);
// Ensure lastAid is always a number
lastAid = row.aid ? Number(row.aid) : 0;
}
// onDoc might close the sub
// Not full batch returned, signal EOF
if (isActive && rows.length < this.BATCH_SIZE)
await onDoc(undefined);
// Schedule next batch
if (isActive) {
// If we got a partial batch, wait before checking for new docs
if (rows.length < this.BATCH_SIZE) {
pauseTimeout = setTimeout(() => fetchBatch(), this.RETRY_INTERVAL_MS);
}
else {
// If we got a full batch, immediately fetch the next batch.
// Use setImmediate to avoid stack overflow with large result sets
setImmediate(() => fetchBatch());
}
}
};
// Start fetching documents asynchronously to let this
// function return the subscription which might be
// accessed in onDoc callback
setImmediate(async () => {
try {
await fetchBatch();
}
catch (err) {
debugError("Error in DocStoreSQLite subscription:", err);
}
});
// Create subscription object with close method
const subscription = {
close: () => {
isActive = false;
if (pauseTimeout)
clearTimeout(pauseTimeout);
},
};
// Return Promise that resolves with the subscription
return Promise.resolve(subscription);
}
/**
* Upsert a document in the store using a single atomic operation
* @param doc - Document to upsert
*/
/**
* Convert Float32Array[] to a single Uint8Array for storage
* @param embeddings - Array of Float32Array embeddings
* @param vectorSize - Size of each embedding vector
* @returns Uint8Array containing all embeddings
*/
float32ArraysToBlob(embeddings, vector_size) {
// Check that we don't exceed the maximum number of vectors (2^16)
if (embeddings.length >= (1 << 16)) {
throw new Error(`Too many embeddings: ${embeddings.length}, maximum is ${(1 << 16) - 1}`);
}
// Check that each embedding has the correct size
for (let i = 0; i < embeddings.length; i++) {
if (embeddings[i].length !== vector_size) {
throw new Error(`Embedding at index ${i} has incorrect size: ${embeddings[i].length}, expected ${vector_size}`);
}
}
// Calculate total size: 2 bytes for count + (vector_size * 4 bytes per float32) * number of embeddings
const totalSize = 2 + (vector_size * 4 * embeddings.length);
const result = new Uint8Array(totalSize);
// Write number of vectors as uint16 (2 bytes)
result[0] = embeddings.length & 0xFF;
result[1] = (embeddings.length >> 8) & 0xFF;
// Write each embedding
let offset = 2;
for (const embedding of embeddings) {
const byteArray = new Uint8Array(embedding.buffer);
result.set(byteArray, offset);
offset += byteArray.length;
}
return result;
}
/**
* Convert a Uint8Array blob back to an array of Float32Array embeddings
* @param blob - Uint8Array blob containing embeddings
* @param docstore_id - ID of the docstore the blob belongs to
* @returns Array of Float32Array embeddings
*/
blobToFloat32Arrays(blob, docstore_id) {
// Get the docstore to determine vector_size
const docstore = this.getDocstoreSync(docstore_id);
if (!docstore || !docstore.vector_size) {
return [];
}
const vector_size = docstore.vector_size;
// Validate blob size - ensure it has at least 2 bytes
if (!blob || blob.length < 2) {
debugError(`Invalid blob size: ${blob ? blob.length : 'null'}, expected at least 2 bytes`);
return [];
}
// Parse the blob
// First 2 bytes are the count of vectors
const count = blob[0] | (blob[1] << 8);
// Each vector is vector_size * 4 bytes (4 bytes per float32)
const bytesPerVector = vector_size * 4;
// Calculate expected blob size
const expectedSize = 2 + (count * bytesPerVector);
// Validate blob size - ensure it has the expected size
if (blob.length !== expectedSize) {
debugError(`Invalid blob size: ${blob.length}, expected ${expectedSize} bytes for ${count} vectors of size ${vector_size}`);
return [];
}
// Create an array to hold the embeddings
const embeddings = [];
// Extract each embedding
let offset = 2;
for (let i = 0; i < count; i++) {
// Create a view into the blob for this embedding using subarray
const vectorBytes = blob.subarray(offset, offset + bytesPerVector);
// Convert to Float32Array
const embedding = new Float32Array(vectorBytes.buffer.slice(vectorBytes.byteOffset, vectorBytes.byteOffset + vectorBytes.byteLength));
embeddings.push(embedding);
offset += bytesPerVector;
}
return embeddings;
}
// This method is no longer needed as we pass docstore_id directly to blobToFloat32Arrays
/**
* Get a docstore by ID
* @param id - ID of the docstore to get
* @returns The docstore if found, null otherwise
*/
getDocstoreSync(id, user_id) {
let query = "SELECT * FROM docstores WHERE id = ?";
const params = [id];
// Add user_id filter if provided
if (user_id !== undefined) {
query += " AND user_id = ?";
params.push(user_id);
}
const stmt = this.db.prepare(query);
const row = stmt.get(...params);
if (!row) {
return undefined;
}
return {
id: String(row.id || ""),
name: String(row.name || ""),
timestamp: Number(row.timestamp || 0),
model: String(row.model || ""),
vector_size: Number(row.vector_size || 0),
options: String(row.options || ""),
user_id: String(row.user_id || "")
};
}
async getDocstore(id, user_id) {
return Promise.resolve(this.getDocstoreSync(id, user_id));
}
async upsert(doc, user_id) {
debugDocstore(`Upserting document: ${doc.id} in docstore: ${doc.docstore_id}, type: ${doc.type}, user_id: ${user_id || doc.user_id || 'none'}`);
if (user_id && doc.user_id !== user_id)
throw new Error("Wrong doc user");
// Get the docstore to check vector_size
const docstore = this.getDocstoreSync(doc.docstore_id);
if (!docstore) {
throw new Error(`Docstore not found: ${doc.docstore_id}`);
}
if (!docstore.vector_size) {
throw new Error(`Docstore ${doc.docstore_id} has no vector_size defined`);
}
// Convert embeddings to blob if present
let embeddingsBlob = null;
if (doc.embeddings && doc.embeddings.length > 0) {
embeddingsBlob = this.float32ArraysToBlob(doc.embeddings, docstore.vector_size);
}
// Use INSERT OR REPLACE to handle both insert and update in a single atomic operation
const stmt = this.db.prepare(`
INSERT OR REPLACE INTO docs (
id, docstore_id, timestamp, created_at, type, data, embeddings, user_id
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
`);
stmt.run(doc.id, doc.docstore_id, doc.timestamp, doc.created_at, doc.type, doc.data, embeddingsBlob, user_id || doc.user_id || "");
return Promise.resolve();
}
/**
* Get a document by ID
* @param docstore_id - ID of the docstore containing the document
* @param doc_id - ID of the document to get
* @returns The document if found, null otherwise
*/
async get(docstore_id, doc_id, user_id) {
let query = "SELECT * FROM docs WHERE docstore_id = ? AND id = ?";
const params = [docstore_id, doc_id];
// Add user_id filter if provided
if (user_id !== undefined) {
query += " AND user_id = ?";
params.push(user_id);
}
const stmt = this.db.prepare(query);
const row = stmt.get(...params);
if (!row) {
return Promise.resolve(null);
}
// Convert embeddings from BLOB to Float32Array[]
let embeddingsArray = [];
if (row.embeddings) {
embeddingsArray = this.blobToFloat32Arrays(row.embeddings, docstore_id);
}
// Convert row to Doc interface
const doc = {
id: row.id?.toString() || "",
docstore_id: row.docstore_id?.toString() || "",
timestamp: Number(row.timestamp || 0),
created_at: Number(row.created_at || 0),
type: row.type?.toString() || "",
data: row.data?.toString() || "",
embeddings: embeddingsArray,
user_id: row.user_id?.toString() || "",
};
return Promise.resolve(doc);
}
/**
* Delete a document from the store
* @param docstore_id - ID of the docstore containing the document
* @param doc_id - ID of the document to delete
* @returns true if document existed and was deleted, false otherwise
*/
async delete(docstore_id, doc_id, user_id) {
let query = "DELETE FROM docs WHERE docstore_id = ? AND id = ?";
const params = [docstore_id, doc_id];
// Add user_id filter if provided
if (user_id !== undefined) {
query += " AND user_id = ?";
params.push(user_id);
}
const stmt = this.db.prepare(query);
const result = stmt.run(...params);
// Return true if a row was affected (document was deleted)
return Promise.resolve(result.changes > 0);
}
/**
* Create a new docstore if one with the given name doesn't exist
* @param name - Name of the docstore to create
* @param model - Name of the embeddings model
* @param vector_size - Size of embedding vectors
* @param options - Options for the model, defaults to empty string
* @returns Promise that resolves with the ID of the created or existing docstore
*/
async createDocstore(name, model = "", vector_size = 0, options = "", user_id) {
debugDocstore(`Creating docstore with name: ${name}, model: ${model}, vector_size: ${vector_size}, user_id: ${user_id || 'none'}`);
// Check if docstore with this name already exists
let query = "SELECT id FROM docstores WHERE name = ?";
const params = [name];
// Add user_id filter if provided
if (user_id !== undefined) {
query += " AND user_id = ?";
params.push(user_id);
}
const existingDocstore = this.db
.prepare(query)
.get(...params);
if (existingDocstore && existingDocstore.id !== null) {
debugDocstore(`Docstore with name ${name} already exists with ID: ${existingDocstore.id}`);
return Promise.resolve(existingDocstore.id.toString());
}
// Create new docstore with UUID
const timestamp = Math.floor(Date.now() / 1000);
const id = crypto.randomUUID();
const stmt = this.db.prepare("INSERT INTO docstores (id, name, timestamp, model, vector_size, options, user_id) VALUES (?, ?, ?, ?, ?, ?, ?)");
stmt.run(id, name, timestamp, model, vector_size, options, user_id || "");
debugDocstore(`Created new docstore with name: ${name}, ID: ${id}, model: ${model}, vector_size: ${vector_size}`);
return Promise.resolve(id);
}
/**
* List all docstores
* @returns Promise that resolves with an array of docstore objects
*/
async listDocstores(user_id) {
let query = "SELECT * FROM docstores";
const params = [];
// Add user_id filter if provided
if (user_id !== undefined) {
query += " WHERE user_id = ?";
params.push(user_id);
}
query += " ORDER BY id ASC";
const stmt = this.db.prepare(query);
const rows = stmt.all(...params);
const docstores = rows.map((row) => ({
id: String(row.id || ""),
name: String(row.name || ""),
timestamp: Number(row.timestamp || 0),
model: String(row.model || ""),
vector_size: Number(row.vector_size || 0),
options: String(row.options || ""),
user_id: String(row.user_id || ""),
}));
return Promise.resolve(docstores);
}
/**
* List docstores by specific IDs
* @param ids - Array of docstore IDs to retrieve
* @returns Promise that resolves with an array of docstore objects
*/
async listDocStoresByIds(ids, user_id) {
if (ids.length === 0) {
return Promise.resolve([]);
}
// Create placeholders for the IN clause
const placeholders = ids.map(() => '?').join(',');
let query = `SELECT * FROM docstores WHERE id IN (${placeholders})`;
const params = [...ids];
// Add user_id filter if provided
if (user_id !== undefined) {
query += " AND user_id = ?";
params.push(user_id);
}
query += " ORDER BY id ASC";
const stmt = this.db.prepare(query);
const rows = stmt.all(...params);
const docstores = rows.map((row) => ({
id: String(row.id || ""),
name: String(row.name || ""),
timestamp: Number(row.timestamp || 0),
model: String(row.model || ""),
vector_size: Number(row.vector_size || 0),
options: String(row.options || ""),
user_id: String(row.user_id || ""),
}));
return Promise.resolve(docstores);
}
/**
* List documents by specific IDs
* @param docstore_id - ID of the docstore containing the documents
* @param ids - Array of document IDs to retrieve
* @returns Promise that resolves with an array of document objects
*/
async listDocsByIds(docstore_id, ids) {
if (ids.length === 0) {
return Promise.resolve([]);
}
// Create placeholders for the IN clause
const placeholders = ids.map(() => '?').join(',');
// Prepare the query with docstore_id and the list of document IDs
const query = `SELECT * FROM docs WHERE docstore_id = ? AND id IN (${placeholders}) ORDER BY id ASC`;
// Create the parameter array with docstore_id as the first parameter
const params = [docstore_id, ...ids];
const stmt = this.db.prepare(query);
const rows = stmt.all(...params);
// Convert rows to Doc objects
const docs = rows.map((row) => {
// Convert embeddings from BLOB to Float32Array[]
let embeddingsArray = [];
if (row.embeddings) {
embeddingsArray = this.blobToFloat32Arrays(row.embeddings, docstore_id);
}
return {
id: row.id?.toString() || "",
docstore_id: row.docstore_id?.toString() || "",
timestamp: Number(row.timestamp || 0),
created_at: Number(row.created_at || 0),
type: row.type?.toString() || "",
data: row.data?.toString() || "",
embeddings: embeddingsArray,
user_id: row.user_id?.toString() || "",
};
});
return Promise.resolve(docs);
}
/**
* Delete a docstore and all its documents
* @param id - ID of the docstore to delete
* @returns Promise that resolves with true if docstore existed and was deleted, false otherwise
*/
async deleteDocstore(id, user_id) {
// Use a transaction to ensure atomicity
this.db.exec("BEGIN TRANSACTION");
try {
// Delete all documents in the docstore
let docsQuery = "DELETE FROM docs WHERE docstore_id = ?";
const docsParams = [id];
// Add user_id filter if provided
if (user_id !== undefined) {
docsQuery += " AND user_id = ?";
docsParams.push(user_id);
}
const docsStmt = this.db.prepare(docsQuery);
docsStmt.run(...docsParams);
// Delete the docstore
let docstoreQuery = "DELETE FROM docstores WHERE id = ?";
const docstoreParams = [id];
// Add user_id filter if provided
if (user_id !== undefined) {
docstoreQuery += " AND user_id = ?";
docstoreParams.push(user_id);
}
const docstoreStmt = this.db.prepare(docstoreQuery);
const result = docstoreStmt.run(...docstoreParams);
this.db.exec("COMMIT");
// Return true if a docstore was deleted
return Promise.resolve(result.changes > 0);
}
catch (error) {
this.db.exec("ROLLBACK");
debugError("Error deleting docstore:", error);
return Promise.resolve(false);
}
}
/**
* Count documents in a docstore
* @param docstore_id - ID of the docstore to count documents for
* @returns Promise that resolves with the number of documents in the docstore
*/
async countDocs(docstore_id, user_id) {
let query = "SELECT COUNT(*) as count FROM docs WHERE docstore_id = ?";
const params = [docstore_id];
// Add user_id filter if provided
if (user_id !== undefined) {
query += " AND user_id = ?";
params.push(user_id);
}
const stmt = this.db.prepare(query);
const result = stmt.get(...params);
return Promise.resolve(result && typeof result.count === "number" ? result.count : 0);
}
/**
* Symbol.dispose method for releasing resources
*/
[Symbol.dispose]() {
this.db.close();
}
}
//# sourceMappingURL=DocStoreSQLite.js.map