lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
377 lines (315 loc) • 11.2 kB
JavaScript
const db = require("../db");
const logger = require("../logger");
const store = require("./store");
// ============================================================================
// CONFIGURATION
// ============================================================================
const MAX_QUERY_LENGTH = 1000;
const MAX_OR_TERMS = 50;
// ============================================================================
// KEYWORD SANITIZATION (NEW - Critical for FTS5 Safety)
// ============================================================================
/**
* Sanitize a single keyword for use in FTS5 queries
* Removes all FTS5 special characters that can cause syntax errors
*
* CRITICAL: FTS5 has many special characters that cause errors:
* - Dash (-) is interpreted as column filter
* - @ is invalid bareword character
* - Parentheses, brackets, quotes can break query syntax
* - Commas and periods can cause issues in newer SQLite versions
*/
function sanitizeKeyword(keyword) {
if (!keyword || typeof keyword !== 'string') {
return '';
}
// Remove ALL FTS5 special characters
const sanitized = keyword
.replace(/[*()<>\-:\[\]{}|^~,.;!?'"@#$%&+=/\\\\]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
return sanitized;
}
/**
* Sanitize array of keywords
* Returns only keywords that are 3+ characters after sanitization
*/
function sanitizeKeywords(keywords) {
if (!Array.isArray(keywords)) {
return [];
}
return keywords
.map(sanitizeKeyword)
.filter(k => k.length >= 3);
}
// ============================================================================
// FTS5 QUERY PREPARATION (UPDATED - Hardened)
// ============================================================================
/**
* Prepare FTS5 query - handle special characters and phrases
*
* CRITICAL FIX for better-sqlite3 v12+ (SQLite 3.46+):
* - Commas and periods inside quoted strings cause "fts5: syntax error near ,"
* - Solution: Extract keywords and search for them individually with OR
* - This is more robust than attempting to quote complex phrases
*/
function prepareFTS5Query(query) {
let cleaned = query.trim();
// Length validation
if (cleaned.length > MAX_QUERY_LENGTH) {
logger.warn({
queryLength: cleaned.length,
truncatedTo: MAX_QUERY_LENGTH
}, 'Query truncated due to excessive length');
cleaned = cleaned.substring(0, MAX_QUERY_LENGTH);
}
if (!cleaned) {
return '"empty query"'; // Safe fallback
}
// Step 1: Remove XML/HTML tags (common in error messages and code)
cleaned = cleaned.replace(/<[^>]+>/g, ' ');
cleaned = cleaned.replace(/\s+/g, ' ').trim();
if (!cleaned) {
return '"empty query"';
}
// Step 2: Check for FTS5 operators (AND, OR, NOT)
// If present, user is doing advanced search - preserve operators
const hasFTS5Operators = /\b(AND|OR|NOT)\b/.test(cleaned);
// Step 3: Remove ALL FTS5 special characters and punctuation
// CRITICAL: These can cause syntax errors even in quoted strings:
// - Commas (,) and periods (.) → "syntax error near ,"
// - Dashes (-) → interpreted as column filter
// - @ symbol → "syntax error near @"
// - Quotes (") → can break string quoting
// - Parentheses, brackets → break grouping syntax
cleaned = cleaned.replace(/[*()<>\-:\[\]{}|^~,.;!?'"@#$%&+=/\\\\]/g, ' ');
cleaned = cleaned.replace(/\s+/g, ' ').trim();
if (!cleaned) {
return '"empty query"';
}
// Step 4: If has operators, return as-is for advanced search
if (hasFTS5Operators) {
// Advanced users can use AND/OR/NOT
// Characters are already sanitized above
return cleaned;
}
// Step 5: Extract keywords (min 3 chars, max 50 words to prevent DoS)
const words = cleaned
.split(/\s+/)
.filter(w => w.length >= 3)
.slice(0, MAX_OR_TERMS);
if (words.length === 0) {
// No valid keywords - try with shorter words
const anyWords = cleaned
.split(/\s+/)
.filter(w => w.length > 0)
.slice(0, 10);
if (anyWords.length === 0) {
return '"empty query"';
}
// Quote each word individually
return anyWords.map(w => `"${w}"`).join(' OR ');
}
// Step 6: Single word - simple quote
if (words.length === 1) {
return `"${words[0]}"`;
}
// Step 7: Multiple words - create OR query of individual quoted words
// This is the safest approach that avoids all FTS5 syntax errors
// Example: "word1" OR "word2" OR "word3"
return words.map(word => `"${word}"`).join(' OR ');
}
// ============================================================================
// SEARCH FUNCTIONS (UPDATED)
// ============================================================================
/**
* Search memories using FTS5 full-text search
*/
function searchMemories(options) {
const {
query,
limit = 10,
types = null,
categories = null,
sessionId = null,
minImportance = null,
} = options;
if (!query || typeof query !== "string") {
logger.warn("Search query must be a non-empty string");
return [];
}
try {
// Build FTS5 query - now hardened against syntax errors
const ftsQuery = prepareFTS5Query(query);
logger.debug({
originalQuery: query.substring(0, 100),
ftsQuery: ftsQuery.substring(0, 100)
}, 'FTS5 query prepared');
// Build SQL with filters
let sql = `
SELECT m.id, m.session_id, m.content, m.type, m.category,
m.importance, m.surprise_score, m.access_count, m.decay_factor,
m.source_turn_id, m.created_at, m.updated_at, m.last_accessed_at, m.metadata,
memories_fts.rank
FROM memories_fts
JOIN memories m ON m.id = memories_fts.rowid
WHERE memories_fts MATCH ?
`;
const params = [ftsQuery];
// Add filters
if (sessionId) {
sql += ` AND (m.session_id = ? OR m.session_id IS NULL)`;
params.push(sessionId);
}
if (types && Array.isArray(types) && types.length > 0) {
const placeholders = types.map(() => "?").join(",");
sql += ` AND m.type IN (${placeholders})`;
params.push(...types);
}
if (categories && Array.isArray(categories) && categories.length > 0) {
const placeholders = categories.map(() => "?").join(",");
sql += ` AND m.category IN (${placeholders})`;
params.push(...categories);
}
if (minImportance !== null && typeof minImportance === "number") {
sql += ` AND m.importance >= ?`;
params.push(minImportance);
}
// Order by FTS5 rank and importance
sql += ` ORDER BY memories_fts.rank, m.importance DESC LIMIT ?`;
params.push(limit);
const startTime = Date.now();
const stmt = db.prepare(sql);
const rows = stmt.all(...params);
const duration = Date.now() - startTime;
// Log slow queries for monitoring
if (duration > 100) {
logger.warn({
query: query.substring(0, 50),
ftsQuery: ftsQuery.substring(0, 50),
duration,
resultCount: rows.length
}, 'Slow FTS5 query detected');
}
return rows.map((row) => ({
id: row.id,
sessionId: row.session_id ?? null,
content: row.content,
type: row.type,
category: row.category ?? null,
importance: row.importance ?? 0.5,
surpriseScore: row.surprise_score ?? 0.0,
accessCount: row.access_count ?? 0,
decayFactor: row.decay_factor ?? 1.0,
sourceTurnId: row.source_turn_id ?? null,
createdAt: row.created_at,
updatedAt: row.updated_at,
lastAccessedAt: row.last_accessed_at ?? null,
metadata: row.metadata ? JSON.parse(row.metadata) : {},
rank: row.rank,
}));
} catch (err) {
logger.error({
err,
query: query.substring(0, 100),
sqliteCode: err.code,
message: err.message
}, "FTS5 search failed");
return [];
}
}
/**
* Search with keyword expansion (extract key terms)
*/
function searchWithExpansion(options) {
const { query, limit = 10 } = options;
// Extract keywords from query
const keywords = extractKeywords(query);
const sanitizedKeywords = sanitizeKeywords(keywords); // ✅ ADDED
// Search with original query (already sanitized by prepareFTS5Query)
const results = searchMemories({ ...options, limit: limit * 2 });
// If not enough results, try individual keywords
if (results.length < limit && sanitizedKeywords.length > 1) {
const seen = new Set(results.map((r) => r.id));
for (const keyword of sanitizedKeywords) { // ✅ CHANGED - use sanitized
if (results.length >= limit) break;
const kwResults = searchMemories({
...options,
query: keyword, // Now guaranteed safe
limit: limit - results.length,
});
for (const result of kwResults) {
if (!seen.has(result.id)) {
results.push(result);
seen.add(result.id);
}
}
}
}
return results.slice(0, limit);
}
/**
* Extract keywords from text (simple tokenization)
*/
function extractKeywords(text) {
if (!text) return [];
const stopwords = new Set([
"the", "is", "at", "which", "on", "and", "or", "not", "this", "that",
"with", "from", "for", "to", "in", "of", "a", "an",
]);
return text
.toLowerCase()
.split(/\s+/)
.map((word) => word.replace(/[^\w]/g, ""))
.filter((word) => word.length > 3 && !stopwords.has(word));
}
/**
* Find similar memories by keyword overlap (UPDATED - sanitized)
*/
function findSimilar(memoryId, limit = 5) {
const memory = store.getMemory(memoryId);
if (!memory) {
throw new Error(`Memory with id ${memoryId} not found`);
}
const keywords = extractKeywords(memory.content);
const sanitizedKeywords = sanitizeKeywords(keywords); // ✅ ADDED
if (sanitizedKeywords.length === 0) return [];
// Build OR query with SANITIZED keywords
const query = sanitizedKeywords.join(" OR "); // ✅ CHANGED - use sanitized
const results = searchMemories({
query,
limit: limit + 1,
});
return results.filter((r) => r.id !== memoryId).slice(0, limit);
}
/**
* Search by content similarity (UPDATED - sanitized)
*/
function searchByContent(content, options = {}) {
const keywords = extractKeywords(content);
const sanitizedKeywords = sanitizeKeywords(keywords); // ✅ ADDED
if (sanitizedKeywords.length === 0) return [];
const query = sanitizedKeywords.slice(0, 5).join(" OR "); // ✅ CHANGED
return searchMemories({ ...options, query });
}
/**
* Count search results without fetching them
*/
function countSearchResults(options) {
const results = searchMemories({ ...options, limit: 1000 });
return results.length;
}
// ============================================================================
// EXPORTS
// ============================================================================
module.exports = {
searchMemories,
searchWithExpansion,
extractKeywords,
findSimilar,
searchByContent,
countSearchResults,
prepareFTS5Query,
sanitizeKeyword, // ✅ NEW - exported for testing
sanitizeKeywords, // ✅ NEW - exported for testing
};