UNPKG

knowledgegraph-mcp

Version:

MCP server for enabling persistent knowledge storage for Claude through a knowledge graph with multiple storage backends

392 lines 16.3 kB
import Fuse from 'fuse.js'; import { BaseSearchStrategy } from './base-strategy.js'; import { getValidatedSearchLimits } from '../config.js'; /** * PostgreSQL search strategy - supports both database-level and client-side fuzzy search */ export class PostgreSQLFuzzyStrategy extends BaseSearchStrategy { pgPool; project; searchLimits = getValidatedSearchLimits(); constructor(config, pgPool, project) { super(config); this.pgPool = pgPool; this.project = project; } canUseDatabase() { return this.config.useDatabaseSearch; } async searchDatabase(query, threshold, project) { // Handle multiple queries with optimized SQL if (Array.isArray(query)) { return this.searchMultipleDatabaseOptimized(query, threshold, project); } // Single query - use existing logic return this.searchSingleDatabase(query, threshold, project); } async searchSingleDatabase(query, threshold, project) { const client = await this.pgPool.connect(); try { // Use provided project parameter or fall back to constructor project const searchProject = project || this.project; const result = await client.query(` SELECT e.*, GREATEST( similarity(e.name, $1), similarity(e.entity_type, $1), similarity(e.observations::text, $1), similarity(e.tags::text, $1) ) as relevance_score FROM entities e WHERE e.project = $3 AND (similarity(e.name, $1) > $2 OR similarity(e.entity_type, $1) > $2 OR similarity(e.observations::text, $1) > $2 OR similarity(e.tags::text, $1) > $2) ORDER BY relevance_score DESC LIMIT $4 `, [query, threshold, searchProject, this.searchLimits.maxResults]); return result.rows.map(row => ({ name: row.name, entityType: row.entity_type, observations: row.observations || [], tags: row.tags || [] })); } finally { client.release(); } } searchClientSide(entities, query) { // Handle multiple queries if (Array.isArray(query)) { return this.searchMultipleClientSide(entities, query); } // Single query - use existing logic return this.searchSingleClientSide(entities, query); } searchSingleClientSide(entities, query) { // Use chunking for large entity sets to improve performance if (entities.length > this.searchLimits.clientSideChunkSize) { console.log(`PostgreSQL: Using chunked search for ${entities.length} entities (chunk size: ${this.searchLimits.clientSideChunkSize})`); return this.searchClientSideChunked(entities, query, this.searchLimits.clientSideChunkSize); } const fuseOptions = { threshold: this.config.threshold, distance: 100, includeScore: true, keys: ['name', 'entityType', 'observations', 'tags'], ...this.config.fuseOptions }; const fuse = new Fuse(entities, fuseOptions); const results = fuse.search(query); return results.map(result => result.item); } /** * Get all entities for a project from PostgreSQL database * This is used to load entities for client-side search * Respects maxClientSideEntities limit to prevent memory issues */ async getAllEntities(project) { const client = await this.pgPool.connect(); try { const searchProject = project || this.project; const result = await client.query(` SELECT name, entity_type, observations, tags FROM entities WHERE project = $1 ORDER BY updated_at DESC, name LIMIT $2 `, [searchProject, this.searchLimits.maxClientSideEntities]); // Log warning if we hit the limit if (result.rows.length === this.searchLimits.maxClientSideEntities) { console.warn(`PostgreSQL getAllEntities: Hit maxClientSideEntities limit of ${this.searchLimits.maxClientSideEntities}. Consider increasing KNOWLEDGEGRAPH_SEARCH_MAX_CLIENT_ENTITIES or using database-level search.`); } return result.rows.map(row => ({ name: row.name, entityType: row.entity_type, observations: row.observations || [], tags: row.tags || [] })); } catch (error) { console.error('Failed to load entities from PostgreSQL:', error); throw error; } finally { client.release(); } } /** * Optimized multiple query search using single SQL query with OR conditions * This replaces the inefficient sequential query processing from base strategy */ async searchMultipleDatabaseOptimized(queries, threshold, project) { // Handle empty queries array if (queries.length === 0) { return []; } // For very large query arrays, use batching to avoid excessive SQL complexity if (queries.length > this.searchLimits.batchSize) { return this.searchMultipleDatabaseBatched(queries, threshold, project, this.searchLimits.batchSize); } const client = await this.pgPool.connect(); try { const searchProject = project || this.project; // Build parameterized query with OR conditions for each search term const conditions = queries.map((_, index) => `(similarity(e.name, $${index + 1}) > $${queries.length + 1} OR similarity(e.entity_type, $${index + 1}) > $${queries.length + 1} OR similarity(e.observations::text, $${index + 1}) > $${queries.length + 1} OR similarity(e.tags::text, $${index + 1}) > $${queries.length + 1})`).join(' OR '); // Build relevance score calculation for all queries const relevanceCalculations = queries.map((_, index) => `similarity(e.name, $${index + 1}), similarity(e.entity_type, $${index + 1}), similarity(e.observations::text, $${index + 1}), similarity(e.tags::text, $${index + 1})`).join(', '); const result = await client.query(` SELECT DISTINCT e.*, GREATEST(${relevanceCalculations}) as relevance_score FROM entities e WHERE e.project = $${queries.length + 2} AND (${conditions}) ORDER BY relevance_score DESC LIMIT $${queries.length + 3} `, [...queries, threshold, searchProject, this.searchLimits.maxResults]); return result.rows.map(row => ({ name: row.name, entityType: row.entity_type, observations: row.observations || [], tags: row.tags || [] })); } finally { client.release(); } } /** * Handle very large query arrays by processing them in batches */ async searchMultipleDatabaseBatched(queries, threshold, project, batchSize = 10) { let allResults = []; const existingEntityNames = new Set(); // Process queries in batches for (let i = 0; i < queries.length; i += batchSize) { const batchQueries = queries.slice(i, i + batchSize); // Use optimized SQL for this batch const batchResults = await this.searchMultipleDatabaseOptimized(batchQueries, threshold, project); // Deduplicate results const newEntities = batchResults.filter(e => !existingEntityNames.has(e.name)); allResults.push(...newEntities); newEntities.forEach(e => existingEntityNames.add(e.name)); } return allResults; } /** * Get all entities with pagination support */ async getAllEntitiesPaginated(pagination, project) { const client = await this.pgPool.connect(); try { const searchProject = project || this.project; const page = pagination.page || 0; const pageSize = pagination.pageSize || 100; const offset = page * pageSize; // Get total count const countResult = await client.query(` SELECT COUNT(*) as total_count FROM entities WHERE project = $1 `, [searchProject]); const totalCount = parseInt(countResult.rows[0].total_count); // Get paginated data const dataResult = await client.query(` SELECT name, entity_type, observations, tags FROM entities WHERE project = $1 ORDER BY updated_at DESC, name LIMIT $2 OFFSET $3 `, [searchProject, pageSize, offset]); const entities = dataResult.rows.map(row => ({ name: row.name, entityType: row.entity_type, observations: row.observations || [], tags: row.tags || [] })); const totalPages = Math.ceil(totalCount / pageSize); return { data: entities, pagination: { currentPage: page, pageSize: pageSize, totalCount: totalCount, totalPages: totalPages, hasNextPage: page < totalPages - 1, hasPreviousPage: page > 0 } }; } catch (error) { console.error('Failed to load paginated entities from PostgreSQL:', error); throw error; } finally { client.release(); } } /** * Database-level fuzzy search with pagination support */ async searchDatabasePaginated(query, threshold, pagination, project) { // Handle multiple queries if (Array.isArray(query)) { return this.searchMultipleDatabasePaginated(query, threshold, pagination, project); } // Single query return this.searchSingleDatabasePaginated(query, threshold, pagination, project); } /** * Single database fuzzy search with pagination */ async searchSingleDatabasePaginated(query, threshold, pagination, project) { const client = await this.pgPool.connect(); try { const searchProject = project || this.project; const page = pagination.page || 0; const pageSize = pagination.pageSize || 100; const offset = page * pageSize; // Get total count const countResult = await client.query(` SELECT COUNT(*) as total_count FROM entities e WHERE e.project = $3 AND (similarity(e.name, $1) > $2 OR similarity(e.entity_type, $1) > $2 OR similarity(e.observations::text, $1) > $2 OR similarity(e.tags::text, $1) > $2) `, [query, threshold, searchProject]); const totalCount = parseInt(countResult.rows[0].total_count); // Get paginated data const dataResult = await client.query(` SELECT e.*, GREATEST( similarity(e.name, $1), similarity(e.entity_type, $1), similarity(e.observations::text, $1), similarity(e.tags::text, $1) ) as relevance_score FROM entities e WHERE e.project = $3 AND (similarity(e.name, $1) > $2 OR similarity(e.entity_type, $1) > $2 OR similarity(e.observations::text, $1) > $2 OR similarity(e.tags::text, $1) > $2) ORDER BY relevance_score DESC LIMIT $4 OFFSET $5 `, [query, threshold, searchProject, pageSize, offset]); const entities = dataResult.rows.map(row => ({ name: row.name, entityType: row.entity_type, observations: row.observations || [], tags: row.tags || [] })); const totalPages = Math.ceil(totalCount / pageSize); return { data: entities, pagination: { currentPage: page, pageSize: pageSize, totalCount: totalCount, totalPages: totalPages, hasNextPage: page < totalPages - 1, hasPreviousPage: page > 0 } }; } catch (error) { console.error('Failed to perform paginated database search in PostgreSQL:', error); throw error; } finally { client.release(); } } /** * Multiple database fuzzy search with pagination */ async searchMultipleDatabasePaginated(queries, threshold, pagination, project) { if (queries.length === 0) { return { data: [], pagination: { currentPage: pagination.page || 0, pageSize: pagination.pageSize || 100, totalCount: 0, totalPages: 0, hasNextPage: false, hasPreviousPage: false } }; } const client = await this.pgPool.connect(); try { const searchProject = project || this.project; const page = pagination.page || 0; const pageSize = pagination.pageSize || 100; const offset = page * pageSize; // Build parameterized query conditions const conditions = queries.map((_, index) => `(similarity(e.name, $${index + 1}) > $${queries.length + 1} OR similarity(e.entity_type, $${index + 1}) > $${queries.length + 1} OR similarity(e.observations::text, $${index + 1}) > $${queries.length + 1} OR similarity(e.tags::text, $${index + 1}) > $${queries.length + 1})`).join(' OR '); // Get total count const countResult = await client.query(` SELECT COUNT(DISTINCT e.name) as total_count FROM entities e WHERE e.project = $${queries.length + 2} AND (${conditions}) `, [...queries, threshold, searchProject]); const totalCount = parseInt(countResult.rows[0].total_count); // Build relevance score calculation for all queries const relevanceCalculations = queries.map((_, index) => `similarity(e.name, $${index + 1}), similarity(e.entity_type, $${index + 1}), similarity(e.observations::text, $${index + 1}), similarity(e.tags::text, $${index + 1})`).join(', '); // Get paginated data const dataResult = await client.query(` SELECT DISTINCT e.*, GREATEST(${relevanceCalculations}) as relevance_score FROM entities e WHERE e.project = $${queries.length + 2} AND (${conditions}) ORDER BY relevance_score DESC LIMIT $${queries.length + 3} OFFSET $${queries.length + 4} `, [...queries, threshold, searchProject, pageSize, offset]); const entities = dataResult.rows.map(row => ({ name: row.name, entityType: row.entity_type, observations: row.observations || [], tags: row.tags || [] })); const totalPages = Math.ceil(totalCount / pageSize); return { data: entities, pagination: { currentPage: page, pageSize: pageSize, totalCount: totalCount, totalPages: totalPages, hasNextPage: page < totalPages - 1, hasPreviousPage: page > 0 } }; } catch (error) { console.error('Failed to perform paginated multiple database search in PostgreSQL:', error); throw error; } finally { client.release(); } } } //# sourceMappingURL=postgresql-strategy.js.map