UNPKG

@mastra/core

Version:

The core foundation of the Mastra framework, providing essential components and interfaces for building AI-powered applications.

740 lines (735 loc) 24.8 kB
import { BaseFilterTranslator } from './chunk-LGERQTJM.js'; import { MastraVector } from './chunk-LXIAHZ45.js'; import { isAbsolute, join, resolve } from 'path'; import { createClient } from '@libsql/client'; // src/vector/libsql/filter.ts var LibSQLFilterTranslator = class extends BaseFilterTranslator { getSupportedOperators() { return { ...BaseFilterTranslator.DEFAULT_OPERATORS, regex: [], custom: ["$contains", "$size"] }; } translate(filter) { if (this.isEmpty(filter)) { return filter; } this.validateFilter(filter); return this.translateNode(filter); } translateNode(node, currentPath = "") { if (this.isRegex(node)) { throw new Error("Direct regex pattern format is not supported in LibSQL"); } const withPath = (result2) => currentPath ? { [currentPath]: result2 } : result2; if (this.isPrimitive(node)) { return withPath({ $eq: this.normalizeComparisonValue(node) }); } if (Array.isArray(node)) { return withPath({ $in: this.normalizeArrayValues(node) }); } const entries = Object.entries(node); const result = {}; for (const [key, value] of entries) { const newPath = currentPath ? `${currentPath}.${key}` : key; if (this.isLogicalOperator(key)) { result[key] = Array.isArray(value) ? value.map((filter) => this.translateNode(filter)) : this.translateNode(value); } else if (this.isOperator(key)) { if (this.isArrayOperator(key) && !Array.isArray(value) && key !== "$elemMatch") { result[key] = [value]; } else if (this.isBasicOperator(key) && Array.isArray(value)) { result[key] = JSON.stringify(value); } else { result[key] = value; } } else if (typeof value === "object" && value !== null) { const hasOperators = Object.keys(value).some((k) => this.isOperator(k)); if (hasOperators) { result[newPath] = this.translateNode(value); } else { Object.assign(result, this.translateNode(value, newPath)); } } else { result[newPath] = this.translateNode(value); } } return result; } // TODO: Look more into regex support for LibSQL // private translateRegexPattern(pattern: string, options: string = ''): any { // if (!options) return { $regex: pattern }; // const flags = options // .split('') // .filter(f => 'imsux'.includes(f)) // .join(''); // return { // $regex: pattern, // $options: flags, // }; // } }; // src/vector/libsql/sql-builder.ts var createBasicOperator = (symbol) => { return (key, value) => ({ sql: `CASE WHEN ? IS NULL THEN json_extract(metadata, '$."${handleKey(key)}"') IS ${symbol === "=" ? "" : "NOT"} NULL ELSE json_extract(metadata, '$."${handleKey(key)}"') ${symbol} ? END`, needsValue: true, transformValue: () => [value, value] }); }; var createNumericOperator = (symbol) => { return (key) => ({ sql: `CAST(json_extract(metadata, '$."${handleKey(key)}"') AS NUMERIC) ${symbol} ?`, needsValue: true }); }; function buildElemMatchConditions(value) { const conditions = Object.entries(value).map(([field, fieldValue]) => { if (field.startsWith("$")) { const { sql, values } = buildCondition("elem.value", { [field]: fieldValue }); const pattern = /json_extract\(metadata, '\$\."[^"]*"(\."[^"]*")*'\)/g; const elemSql = sql.replace(pattern, "elem.value"); return { sql: elemSql, values }; } else if (typeof fieldValue === "object" && !Array.isArray(fieldValue)) { const { sql, values } = buildCondition(field, fieldValue); const pattern = /json_extract\(metadata, '\$\."[^"]*"(\."[^"]*")*'\)/g; const elemSql = sql.replace(pattern, `json_extract(elem.value, '$."${field}"')`); return { sql: elemSql, values }; } else { return { sql: `json_extract(elem.value, '$."${field}"') = ?`, values: [fieldValue] }; } }); return conditions; } var validateJsonArray = (key) => `json_valid(json_extract(metadata, '$."${handleKey(key)}"')) AND json_type(json_extract(metadata, '$."${handleKey(key)}"')) = 'array'`; var FILTER_OPERATORS = { $eq: createBasicOperator("="), $ne: createBasicOperator("!="), $gt: createNumericOperator(">"), $gte: createNumericOperator(">="), $lt: createNumericOperator("<"), $lte: createNumericOperator("<="), // Array Operators $in: (key, value) => { const arr = Array.isArray(value) ? value : [value]; if (arr.length === 0) { return { sql: "1 = 0", needsValue: true, transformValue: () => [] }; } const paramPlaceholders = arr.map(() => "?").join(","); return { sql: `( CASE WHEN ${validateJsonArray(key)} THEN EXISTS ( SELECT 1 FROM json_each(json_extract(metadata, '$."${handleKey(key)}"')) as elem WHERE elem.value IN (SELECT value FROM json_each(?)) ) ELSE json_extract(metadata, '$."${handleKey(key)}"') IN (${paramPlaceholders}) END )`, needsValue: true, transformValue: () => [JSON.stringify(arr), ...arr] }; }, $nin: (key, value) => { const arr = Array.isArray(value) ? value : [value]; if (arr.length === 0) { return { sql: "1 = 1", needsValue: true, transformValue: () => [] }; } const paramPlaceholders = arr.map(() => "?").join(","); return { sql: `( CASE WHEN ${validateJsonArray(key)} THEN NOT EXISTS ( SELECT 1 FROM json_each(json_extract(metadata, '$."${handleKey(key)}"')) as elem WHERE elem.value IN (SELECT value FROM json_each(?)) ) ELSE json_extract(metadata, '$."${handleKey(key)}"') NOT IN (${paramPlaceholders}) END )`, needsValue: true, transformValue: () => [JSON.stringify(arr), ...arr] }; }, $all: (key, value) => { let sql; const arrayValue = Array.isArray(value) ? value : [value]; if (arrayValue.length === 0) { sql = "1 = 0"; } else { sql = `( CASE WHEN ${validateJsonArray(key)} THEN NOT EXISTS ( SELECT value FROM json_each(?) WHERE value NOT IN ( SELECT value FROM json_each(json_extract(metadata, '$."${handleKey(key)}"')) ) ) ELSE FALSE END )`; } return { sql, needsValue: true, transformValue: () => { if (arrayValue.length === 0) { return []; } return [JSON.stringify(arrayValue)]; } }; }, $elemMatch: (key, value) => { if (typeof value !== "object" || Array.isArray(value)) { throw new Error("$elemMatch requires an object with conditions"); } const conditions = buildElemMatchConditions(value); return { sql: `( CASE WHEN ${validateJsonArray(key)} THEN EXISTS ( SELECT 1 FROM json_each(json_extract(metadata, '$."${handleKey(key)}"')) as elem WHERE ${conditions.map((c) => c.sql).join(" AND ")} ) ELSE FALSE END )`, needsValue: true, transformValue: () => conditions.flatMap((c) => c.values) }; }, // Element Operators $exists: (key) => ({ sql: `json_extract(metadata, '$."${handleKey(key)}"') IS NOT NULL`, needsValue: false }), // Logical Operators $and: (key) => ({ sql: `(${key})`, needsValue: false }), $or: (key) => ({ sql: `(${key})`, needsValue: false }), $not: (key) => ({ sql: `NOT (${key})`, needsValue: false }), $nor: (key) => ({ sql: `NOT (${key})`, needsValue: false }), $size: (key, paramIndex) => ({ sql: `( CASE WHEN json_type(json_extract(metadata, '$."${handleKey(key)}"')) = 'array' THEN json_array_length(json_extract(metadata, '$."${handleKey(key)}"')) = $${paramIndex} ELSE FALSE END )`, needsValue: true }), // /** // * Regex Operators // * Supports case insensitive and multiline // */ // $regex: (key: string): FilterOperator => ({ // sql: `json_extract(metadata, '$."${handleKey(key)}"') = ?`, // needsValue: true, // transformValue: (value: any) => { // const pattern = typeof value === 'object' ? value.$regex : value; // const options = typeof value === 'object' ? value.$options || '' : ''; // let sql = `json_extract(metadata, '$."${handleKey(key)}"')`; // // Handle multiline // // if (options.includes('m')) { // // sql = `REPLACE(${sql}, CHAR(10), '\n')`; // // } // // let finalPattern = pattern; // // if (options) { // // finalPattern = `(\\?${options})${pattern}`; // // } // // // Handle case insensitivity // // if (options.includes('i')) { // // sql = `LOWER(${sql}) REGEXP LOWER(?)`; // // } else { // // sql = `${sql} REGEXP ?`; // // } // if (options.includes('m')) { // sql = `EXISTS ( // SELECT 1 // FROM json_each( // json_array( // ${sql}, // REPLACE(${sql}, CHAR(10), CHAR(13)) // ) // ) as lines // WHERE lines.value REGEXP ? // )`; // } else { // sql = `${sql} REGEXP ?`; // } // // Handle case insensitivity // if (options.includes('i')) { // sql = sql.replace('REGEXP ?', 'REGEXP LOWER(?)'); // sql = sql.replace('value REGEXP', 'LOWER(value) REGEXP'); // } // // Handle extended - allows whitespace and comments in pattern // if (options.includes('x')) { // // Remove whitespace and comments from pattern // const cleanPattern = pattern.replace(/\s+|#.*$/gm, ''); // return { // sql, // values: [cleanPattern], // }; // } // return { // sql, // values: [pattern], // }; // }, // }), $contains: (key, value) => { let sql; if (Array.isArray(value)) { sql = `( SELECT ${validateJsonArray(key)} AND EXISTS ( SELECT 1 FROM json_each(json_extract(metadata, '$."${handleKey(key)}"')) as m WHERE m.value IN (SELECT value FROM json_each(?)) ) )`; } else if (typeof value === "string") { sql = `lower(json_extract(metadata, '$."${handleKey(key)}"')) LIKE '%' || lower(?) || '%'`; } else { sql = `json_extract(metadata, '$."${handleKey(key)}"') = ?`; } return { sql, needsValue: true, transformValue: () => { if (Array.isArray(value)) { return [JSON.stringify(value)]; } if (typeof value === "object" && value !== null) { return [JSON.stringify(value)]; } return [value]; } }; } /** * $objectContains: True JSON containment for advanced use (deep sub-object match). * Usage: { field: { $objectContains: { ...subobject } } } */ // $objectContains: (key: string) => ({ // sql: '', // Will be overridden by transformValue // needsValue: true, // transformValue: (value: any) => ({ // sql: `json_type(json_extract(metadata, '$."${handleKey(key)}"')) = 'object' // AND json_patch(json_extract(metadata, '$."${handleKey(key)}"'), ?) = json_extract(metadata, '$."${handleKey(key)}"')`, // values: [JSON.stringify(value)], // }), // }), }; var handleKey = (key) => { return key.replace(/\./g, '"."'); }; function buildFilterQuery(filter) { if (!filter) { return { sql: "", values: [] }; } const values = []; const conditions = Object.entries(filter).map(([key, value]) => { const condition = buildCondition(key, value); values.push(...condition.values); return condition.sql; }).join(" AND "); return { sql: conditions ? `WHERE ${conditions}` : "", values }; } function buildCondition(key, value, parentPath) { if (["$and", "$or", "$not", "$nor"].includes(key)) { return handleLogicalOperator(key, value); } if (!value || typeof value !== "object") { return { sql: `json_extract(metadata, '$."${key.replace(/\./g, '"."')}"') = ?`, values: [value] }; } return handleOperator(key, value); } function handleLogicalOperator(key, value, parentPath) { if (!value || value.length === 0) { switch (key) { case "$and": case "$nor": return { sql: "true", values: [] }; case "$or": return { sql: "false", values: [] }; case "$not": throw new Error("$not operator cannot be empty"); default: return { sql: "true", values: [] }; } } if (key === "$not") { const entries = Object.entries(value); const conditions2 = entries.map(([fieldKey, fieldValue]) => buildCondition(fieldKey, fieldValue)); return { sql: `NOT (${conditions2.map((c) => c.sql).join(" AND ")})`, values: conditions2.flatMap((c) => c.values) }; } const values = []; const joinOperator = key === "$or" || key === "$nor" ? "OR" : "AND"; const conditions = Array.isArray(value) ? value.map((f) => { const entries = Object.entries(f); return entries.map(([k, v]) => buildCondition(k, v)); }) : [buildCondition(key, value)]; const joined = conditions.flat().map((c) => { values.push(...c.values); return c.sql; }).join(` ${joinOperator} `); return { sql: key === "$nor" ? `NOT (${joined})` : `(${joined})`, values }; } function handleOperator(key, value) { if (typeof value === "object" && !Array.isArray(value)) { const entries = Object.entries(value); const results = entries.map( ([operator2, operatorValue2]) => operator2 === "$not" ? { sql: `NOT (${Object.entries(operatorValue2).map(([op, val]) => processOperator(key, op, val).sql).join(" AND ")})`, values: Object.entries(operatorValue2).flatMap( ([op, val]) => processOperator(key, op, val).values ) } : processOperator(key, operator2, operatorValue2) ); return { sql: `(${results.map((r) => r.sql).join(" AND ")})`, values: results.flatMap((r) => r.values) }; } const [[operator, operatorValue] = []] = Object.entries(value); return processOperator(key, operator, operatorValue); } var processOperator = (key, operator, operatorValue) => { if (!operator.startsWith("$") || !FILTER_OPERATORS[operator]) { throw new Error(`Invalid operator: ${operator}`); } const operatorFn = FILTER_OPERATORS[operator]; const operatorResult = operatorFn(key, operatorValue); if (!operatorResult.needsValue) { return { sql: operatorResult.sql, values: [] }; } const transformed = operatorResult.transformValue ? operatorResult.transformValue() : operatorValue; return { sql: operatorResult.sql, values: Array.isArray(transformed) ? transformed : [transformed] }; }; // src/vector/libsql/index.ts var LibSQLVector = class extends MastraVector { turso; constructor({ connectionUrl, authToken, syncUrl, syncInterval }) { super(); this.turso = createClient({ url: this.rewriteDbUrl(connectionUrl), syncUrl, authToken, syncInterval }); if (connectionUrl.includes(`file:`) || connectionUrl.includes(`:memory:`)) { void this.turso.execute({ sql: "PRAGMA journal_mode=WAL;", args: {} }); } } // If we're in the .mastra/output directory, use the dir outside .mastra dir // reason we need to do this is libsql relative file paths are based on cwd, not current file path // since mastra dev sets cwd to .mastra/output this means running an agent directly vs running with mastra dev // will put db files in different locations, leading to an inconsistent experience between the two. // Ex: with `file:ex.db` // 1. `mastra dev`: ${cwd}/.mastra/output/ex.db // 2. `tsx src/index.ts`: ${cwd}/ex.db // so if we're in .mastra/output we need to rewrite the file url to be relative to the project root dir // or the experience will be inconsistent // this means `file:` urls are always relative to project root // TODO: can we make this easier via bundling? https://github.com/mastra-ai/mastra/pull/2783#pullrequestreview-2662444241 rewriteDbUrl(url) { if (url.startsWith("file:")) { const pathPart = url.slice("file:".length); if (isAbsolute(pathPart)) { return url; } const cwd = process.cwd(); if (cwd.includes(".mastra") && (cwd.endsWith(`output`) || cwd.endsWith(`output/`) || cwd.endsWith(`output\\`))) { const baseDir = join(cwd, `..`, `..`); const fullPath = resolve(baseDir, pathPart); this.logger.debug( `Initializing LibSQL db with url ${url} with relative file path from inside .mastra/output directory. Rewriting relative file url to "file:${fullPath}". This ensures it's outside the .mastra/output directory.` ); return `file:${fullPath}`; } } return url; } transformFilter(filter) { const translator = new LibSQLFilterTranslator(); return translator.translate(filter); } async query(...args) { const params = this.normalizeArgs("query", args, ["minScore"]); try { const { indexName, queryVector, topK = 10, filter, includeVector = false, minScore = 0 } = params; const vectorStr = `[${queryVector.join(",")}]`; const translatedFilter = this.transformFilter(filter); const { sql: filterQuery, values: filterValues } = buildFilterQuery(translatedFilter); filterValues.push(minScore); const query = ` WITH vector_scores AS ( SELECT vector_id as id, (1-vector_distance_cos(embedding, '${vectorStr}')) as score, metadata ${includeVector ? ", vector_extract(embedding) as embedding" : ""} FROM ${indexName} ${filterQuery} ) SELECT * FROM vector_scores WHERE score > ? ORDER BY score DESC LIMIT ${topK}`; const result = await this.turso.execute({ sql: query, args: filterValues }); return result.rows.map(({ id, score, metadata, embedding }) => ({ id, score, metadata: JSON.parse(metadata ?? "{}"), ...includeVector && embedding && { vector: JSON.parse(embedding) } })); } finally { } } async upsert(...args) { const params = this.normalizeArgs("upsert", args); const { indexName, vectors, metadata, ids } = params; const tx = await this.turso.transaction("write"); try { const vectorIds = ids || vectors.map(() => crypto.randomUUID()); for (let i = 0; i < vectors.length; i++) { const query = ` INSERT INTO ${indexName} (vector_id, embedding, metadata) VALUES (?, vector32(?), ?) ON CONFLICT(vector_id) DO UPDATE SET embedding = vector32(?), metadata = ? `; await tx.execute({ sql: query, // @ts-ignore args: [ vectorIds[i], JSON.stringify(vectors[i]), JSON.stringify(metadata?.[i] || {}), JSON.stringify(vectors[i]), JSON.stringify(metadata?.[i] || {}) ] }); } await tx.commit(); return vectorIds; } catch (error) { await tx.rollback(); if (error instanceof Error && error.message?.includes("dimensions are different")) { const match = error.message.match(/dimensions are different: (\d+) != (\d+)/); if (match) { const [, actual, expected] = match; throw new Error( `Vector dimension mismatch: Index "${indexName}" expects ${expected} dimensions but got ${actual} dimensions. Either use a matching embedding model or delete and recreate the index with the new dimension.` ); } } throw error; } } async createIndex(...args) { const params = this.normalizeArgs("createIndex", args); const { indexName, dimension } = params; try { if (!indexName.match(/^[a-zA-Z_][a-zA-Z0-9_]*$/)) { throw new Error("Invalid index name format"); } if (!Number.isInteger(dimension) || dimension <= 0) { throw new Error("Dimension must be a positive integer"); } await this.turso.execute({ sql: ` CREATE TABLE IF NOT EXISTS ${indexName} ( id SERIAL PRIMARY KEY, vector_id TEXT UNIQUE NOT NULL, embedding F32_BLOB(${dimension}), metadata TEXT DEFAULT '{}' ); `, args: [] }); await this.turso.execute({ sql: ` CREATE INDEX IF NOT EXISTS ${indexName}_vector_idx ON ${indexName} (libsql_vector_idx(embedding)) `, args: [] }); } catch (error) { console.error("Failed to create vector table:", error); throw error; } finally { } } async deleteIndex(indexName) { try { await this.turso.execute({ sql: `DROP TABLE IF EXISTS ${indexName}`, args: [] }); } catch (error) { console.error("Failed to delete vector table:", error); throw new Error(`Failed to delete vector table: ${error.message}`); } finally { } } async listIndexes() { try { const vectorTablesQuery = ` SELECT name FROM sqlite_master WHERE type='table' AND sql LIKE '%F32_BLOB%'; `; const result = await this.turso.execute({ sql: vectorTablesQuery, args: [] }); return result.rows.map((row) => row.name); } catch (error) { throw new Error(`Failed to list vector tables: ${error.message}`); } } async describeIndex(indexName) { try { const tableInfoQuery = ` SELECT sql FROM sqlite_master WHERE type='table' AND name = ?; `; const tableInfo = await this.turso.execute({ sql: tableInfoQuery, args: [indexName] }); if (!tableInfo.rows[0]?.sql) { throw new Error(`Table ${indexName} not found`); } const dimension = parseInt(tableInfo.rows[0].sql.match(/F32_BLOB\((\d+)\)/)?.[1] || "0"); const countQuery = ` SELECT COUNT(*) as count FROM ${indexName}; `; const countResult = await this.turso.execute({ sql: countQuery, args: [] }); const metric = "cosine"; return { dimension, count: countResult?.rows?.[0]?.count ?? 0, metric }; } catch (e) { throw new Error(`Failed to describe vector table: ${e.message}`); } } /** * Updates an index entry by its ID with the provided vector and/or metadata. * * @param indexName - The name of the index to update. * @param id - The ID of the index entry to update. * @param update - An object containing the vector and/or metadata to update. * @param update.vector - An optional array of numbers representing the new vector. * @param update.metadata - An optional record containing the new metadata. * @returns A promise that resolves when the update is complete. * @throws Will throw an error if no updates are provided or if the update operation fails. */ async updateIndexById(indexName, id, update) { try { const updates = []; const args = []; if (update.vector) { updates.push("embedding = vector32(?)"); args.push(JSON.stringify(update.vector)); } if (update.metadata) { updates.push("metadata = ?"); args.push(JSON.stringify(update.metadata)); } if (updates.length === 0) { throw new Error("No updates provided"); } args.push(id); const query = ` UPDATE ${indexName} SET ${updates.join(", ")} WHERE vector_id = ?; `; await this.turso.execute({ sql: query, args }); } catch (error) { throw new Error(`Failed to update index by id: ${id} for index: ${indexName}: ${error.message}`); } } async deleteIndexById(indexName, id) { try { await this.turso.execute({ sql: `DELETE FROM ${indexName} WHERE vector_id = ?`, args: [id] }); } catch (error) { throw new Error(`Failed to delete index by id: ${id} for index: ${indexName}: ${error.message}`); } } async truncateIndex(indexName) { await this.turso.execute({ sql: `DELETE FROM ${indexName}`, args: [] }); } }; export { LibSQLVector };