@storecraft/database-turso
Version:
`Storecraft` database driver for `Turso` (cloud sqlite)
332 lines (275 loc) • 8.25 kB
JavaScript
/**
* @import {
* AIEmbedder, VectorStore
* } from '@storecraft/core/ai/core/types.private.js'
* @import { ENV } from '@storecraft/core';
* @import {
* Config
* } from './types.js'
* @import {
* VectorDocumentUpsert
* } from './types.private.js'
* @import { InArgs } from '@libsql/client';
*/
import * as libsql from "@libsql/client";
import {
truncate_or_pad_vector
} from "@storecraft/core/ai/models/vector-stores/index.js";
export const DEFAULT_INDEX_NAME = 'vector_store';
/** @param {any} json */
const parse_json_safely = json => {
try {
return JSON.parse(json);
} catch (e) {
return {};
} finally {
}
}
/**
* Implementation referenes:
* - https://docs.turso.tech/features/ai-and-embeddings#vectors-usage
* - https://github.com/langchain-ai/langchainjs/blob/9dfaae7e36a1ddce586b9c44fb96785fa38b36ec/libs/langchain-community/src/vectorstores/libsql.ts
*/
/**
* @typedef {VectorStore} Impl
*/
/**
* @description LibSQL / Turso Vector Store
*
* @implements {VectorStore}
*/
export class LibSQLVectorStore {
/** @satisfies {ENV<Config>} */
static EnvConfig = /** @type {const} */ ({
authToken: 'LIBSQL_VECTOR_AUTH_TOKEN',
url: 'LIBSQL_VECTOR_URL',
});
/** @type {Config} */
config;
/** @type {libsql.Client} */
#client
/**
*
* @param {Config} config
*/
constructor(config) {
this.config = {
index_name: DEFAULT_INDEX_NAME,
similarity: 'cosine',
dimensions: 1536,
...config,
};
}
/** @type {VectorStore["metric"]} */
get metric() {
return this.config.similarity;
};
/** @type {VectorStore["dimensions"]} */
get dimensions() {
return this.config.dimensions;
};
get client() {
if(!this.config.url) {
throw new Error('LibSQLVectorStore::client() - missing url');
}
// @ts-ignore
this.#client = this.#client ?? libsql.createClient(this.config);
return this.#client;
}
get index_name() {
return this.config.index_name;
}
get table_name() {
return `${this.index_name}_table`;
}
/** @type {VectorStore["onInit"]} */
onInit = (app) => {
this.config.authToken ??=
app.env[LibSQLVectorStore.EnvConfig.authToken]
?? app.env['LIBSQL_AUTH_TOKEN'];
this.config.url ??=
app.env[LibSQLVectorStore.EnvConfig.url]
?? app.env['LIBSQL_URL'] ?? 'file:data.db';
}
/** @type {VectorStore["embedder"]} */
get embedder() {
return this.config.embedder
}
// (id TEXT, metadata TEXT, pageContent Text, updated_at TEXT, namespace TEXT, embedding F32_BLOB
/** @type {VectorStore["upsertVectors"]} */
upsertVectors = async (vectors, documents, options) => {
const updated_at = new Date().toISOString();
/** @type {VectorDocumentUpsert[]} */
const docs_upsert = documents.map(
(doc, ix) => (
{
embedding: `[${truncate_or_pad_vector(vectors[ix], this.config.dimensions).join(',')}]`,
id: doc.id,
metadata: JSON.stringify(doc.metadata ?? {}),
pageContent: doc.pageContent,
updated_at,
namespace: doc.namespace,
}
)
);
/** @type {import("@libsql/client").InStatement[]} */
const stmts_delete = docs_upsert.map(
(doc, ix) => (
{
sql: `DELETE FROM ${this.table_name} WHERE id=?`,
args: [doc.id]
}
)
);
/** @type {import("@libsql/client").InStatement[]} */
const stmts_insert = docs_upsert.map(
(doc, ix) => (
{
sql: `
INSERT INTO ${this.table_name} (id, metadata, pageContent, updated_at, namespace, embedding)
VALUES (:id, :metadata, :pageContent, :updated_at, :namespace, vector(:embedding))
`,
args: doc
}
)
);
const result = await this.client.batch(
[
...stmts_delete,
...stmts_insert,
]
);
}
/** @type {VectorStore["upsertDocuments"]} */
upsertDocuments = async (documents, options) => {
// first, generate embeddings for the documents
const result = await this.embedder.generateEmbeddings(
{
content: documents.map(
doc => (
{
content: doc.pageContent,
type: 'text'
}
)
)
}
);
if(!result) {
console.warn(
'LibSQLVectorStore::upsertDocuments() - no result from embedder'
);
return;
}
const vectors = result.content;
// console.log(vectors)
return this.upsertVectors(
vectors, documents, options
)
}
/** @type {VectorStore["delete"]} */
delete = async (ids) => {
await this.client.execute(
{
sql: `DELETE FROM ${this.table_name} WHERE id IN (${ids.map(id => '?').join(',')})`,
args: ids
}
);
}
/** @type {VectorStore["similaritySearch"]} */
similaritySearch = async (query, k, namespaces) => {
// console.log({query,k,namespaces})
const embedding_result = await this.embedder.generateEmbeddings(
{
content: [
{
content: query,
type: 'text'
}
]
}
);
const vector = truncate_or_pad_vector(
embedding_result.content[0], this.config.dimensions
);
const vector_sql_value = `[${vector.join(',')}]`
const distance_fn = this.config.similarity==='cosine' ? 'vector_distance_cos' : 'vector_distance_l2'
// SELECT title, year
// FROM vector_top_k('movies_idx', vector32('[0.064, 0.777, 0.661, 0.687]'), 3)
// JOIN movies ON movies.rowid = id
// WHERE year >= 2020;
const table = this.table_name;
const index_name = this.index_name;
/** @type {InArgs} */
let args = [];
let sql = `
SELECT ${table}.id, metadata, pageContent, updated_at, namespace, ${distance_fn}(embedding, vector(?)) AS score
FROM vector_top_k('${index_name}', vector(?), CAST(? AS INTEGER)) as top_k_view
JOIN ${table} ON ${table}.rowid = top_k_view.rowid
`;
// console.log(typeof k)
args.push(vector_sql_value, vector_sql_value, k);
if(Array.isArray(namespaces) && namespaces.length) {
sql += `\nWHERE namespace IN (${namespaces.map(n => '?').join(',')})`
args.push(...namespaces);
}
sql += `
ORDER BY
${distance_fn}(embedding, vector(?))
ASC;
`
args.push(vector_sql_value);
const result = await this.client.execute({ sql, args });
// console.log({result})
return result.rows.map(
(row) => (
{
document: {
pageContent: String(row.pageContent),
id: String(row.id),
metadata: parse_json_safely(row.metadata),
namespace: String(row.namespace),
},
// `libsql` score is (1 - Cosine Similarity) which yields a distance
// between [0, 2] where 0 is the most similar.
// This is not in accordance with other apis, so we invert it to [-1, 1]
score: (this.metric==='cosine') ? (1.0 - Number(row.score)) : Number(row.score)
}
)
);
}
/**
*
* @param {Record<string, any>} [params={}]
* @param {boolean} [delete_index_if_exists_before=false]
* @returns {Promise<boolean>}
*/
createVectorIndex = async (params={}, delete_index_if_exists_before=false) => {
/** @type {string[]} */
const batch = [];
if(delete_index_if_exists_before) {
await this.deleteVectorIndex();
}
batch.push(
`CREATE TABLE IF NOT EXISTS ${this.table_name} (id TEXT, metadata TEXT, pageContent Text, updated_at TEXT, namespace TEXT, embedding F32_BLOB(${this.config.dimensions}));`,
`CREATE INDEX IF NOT EXISTS ${this.index_name} ON ${this.table_name}(libsql_vector_idx(embedding));`
);
const result = await this.client.batch(batch);
// console.log({result});
return true;
}
/**
*
* @returns {Promise<boolean>}
*/
deleteVectorIndex = async () => {
/** @type {string[]} */
const batch = [];
batch.push(
`DROP INDEX IF EXISTS ${this.index_name}`,
`DROP TABLE IF EXISTS ${this.table_name}`,
);
const result = await this.client.batch(batch);
return true;
}
}