UNPKG

@simonecoelhosfo/optimizely-mcp-server

Version:

Optimizely MCP Server for AI assistants with integrated CLI tools

395 lines 17.1 kB
/** * JOIN Cardinality Guard - Prevent Query Explosion * * CRITICAL COMPONENT: Prevents Cartesian product explosions by: * 1. Estimating JOIN multiplication factors * 2. Enforcing cardinality limits based on query type * 3. Automatically injecting DISTINCT when necessary * 4. Providing fallback strategies for dangerous JOINs * * Created: July 5, 2025 * Purpose: Second line of defense against 700-1400x count inflation */ import { getLogger } from '../../logging/Logger.js'; const logger = getLogger(); export class CardinalityGuard { config; tableStats = new Map(); joinHistograms = new Map(); // Track historical multipliers constructor(config = {}) { this.config = { enabled: true, limits: { simpleCount: 1, // COUNT(*) should never multiply groupByCount: 2, // GROUP BY COUNT can multiply by 2x max crossEntity: 5, // Cross-entity queries up to 5x detail: 10, // Detail queries up to 10x maxExecutionTime: 5000 // 5 second timeout }, autoDistinct: true, fallbackEnabled: true, debugMode: true, ...config }; this.initializeTableStats(); logger.info(`CardinalityGuard initialized with limits: ${JSON.stringify(this.config.limits)}`); } /** * Validate if a set of JOINs is safe based on cardinality estimation */ validateJoinCardinality(joins, queryType, queryContext) { if (!this.config.enabled) { return { isValid: true, totalMultiplier: 1, estimates: [], recommendations: [] }; } const estimates = joins.map(join => this.estimateJoinCardinality(join)); const totalMultiplier = this.calculateTotalMultiplier(estimates); const limit = this.getLimitForQueryType(queryType); // L2-1 FIX: Check if this is a user-requested GROUP BY const isUserRequestedGroupBy = queryType === 'groupBy' && queryContext?.groupByFields && queryContext.groupByFields.length > 0; // Allow user-requested GROUP BY queries even if they exceed normal limits const isValid = isUserRequestedGroupBy ? true : (totalMultiplier <= limit); const recommendations = []; if (!isValid && !isUserRequestedGroupBy) { recommendations.push(`Total multiplier ${totalMultiplier}x exceeds limit ${limit}x for ${queryType} queries`); // Add specific recommendations for (const estimate of estimates) { if (estimate.expectedMultiplier > 2) { if (estimate.relationshipType === 'many-to-many') { recommendations.push(`Avoid many-to-many JOIN ${estimate.fromTable}${estimate.toTable} (${estimate.expectedMultiplier}x)`); } else if (this.isLookupTable(estimate.toTable)) { recommendations.push(`Avoid JOIN to lookup table ${estimate.toTable} (${estimate.expectedMultiplier}x)`); } } } // Add auto-DISTINCT recommendation if (this.config.autoDistinct && queryType === 'count') { recommendations.push('Consider using COUNT(DISTINCT primary_key) instead of COUNT(*)'); } } if (this.config.debugMode) { logger.info(`Cardinality validation: ${joins.length} JOINs, ${totalMultiplier}x multiplier, valid: ${isValid}`); if (isUserRequestedGroupBy) { logger.debug(`Allowing user-requested GROUP BY despite ${totalMultiplier}x multiplier`); } estimates.forEach(est => { logger.debug(` ${est.fromTable}${est.toTable}: ${est.expectedMultiplier}x (${est.relationshipType})`); }); } return { isValid, totalMultiplier, estimates, recommendations, fallbackSQL: !isValid && this.config.fallbackEnabled ? (() => { const projectId = this.extractProjectId(queryContext); const groupByFields = queryContext?.groupByFields; const fallbackSQL = this.generateFallbackSQL(joins, queryType, projectId, groupByFields); return fallbackSQL; })() : undefined }; } /** * Estimate cardinality for a single JOIN */ estimateJoinCardinality(join) { const fromStats = this.getTableStats(join.fromTable); const toStats = this.getTableStats(join.toTable); // Determine relationship type based on table characteristics const relationshipType = this.determineRelationshipType(join.fromTable, join.toTable); let expectedMultiplier; let confidence; switch (relationshipType) { case 'one-to-one': expectedMultiplier = 1; confidence = 0.9; break; case 'one-to-many': // Calculate average number of related records expectedMultiplier = Math.min(toStats.estimatedRows / fromStats.estimatedRows, 10); expectedMultiplier = Math.max(expectedMultiplier, 1); // Never less than 1 confidence = 0.7; break; case 'many-to-many': // Most dangerous - can create massive explosions expectedMultiplier = Math.sqrt(fromStats.estimatedRows * toStats.estimatedRows) / 100; expectedMultiplier = Math.min(expectedMultiplier, 1000); // Cap at 1000x confidence = 0.5; // Low confidence due to high variability break; } // Apply JOIN selectivity factor const selectivity = Math.min(fromStats.averageJoinSelectivity, toStats.averageJoinSelectivity); expectedMultiplier *= selectivity; // Special handling for known dangerous patterns if (this.isDangerousJoinPattern(join.fromTable, join.toTable)) { expectedMultiplier *= 10; // Multiply by danger factor confidence *= 0.5; // Reduce confidence } return { fromTable: join.fromTable, toTable: join.toTable, estimatedFromRows: fromStats.estimatedRows, estimatedToRows: toStats.estimatedRows, relationshipType, expectedMultiplier: Math.round(expectedMultiplier * 100) / 100, // Round to 2 decimals confidence }; } /** * Calculate total multiplication factor for all JOINs */ calculateTotalMultiplier(estimates) { return estimates.reduce((total, estimate) => total * estimate.expectedMultiplier, 1); } /** * Get cardinality limit for query type */ getLimitForQueryType(queryType) { switch (queryType) { case 'count': return this.config.limits.simpleCount; case 'groupBy': return this.config.limits.groupByCount; case 'crossEntity': return this.config.limits.crossEntity; case 'detail': return this.config.limits.detail; default: return this.config.limits.groupByCount; // Safe default } } /** * Determine relationship type between two tables */ determineRelationshipType(fromTable, toTable) { // Known one-to-many relationships const oneToMany = [ ['flags', 'flag_environments'], ['experiments', 'experiment_pages'], ['projects', 'flags'], ['projects', 'experiments'] ]; if (oneToMany.some(([from, to]) => fromTable === from && toTable === to)) { return 'one-to-many'; } // Known many-to-many relationships (dangerous) const manyToMany = [ ['experiments', 'audiences'], ['flags', 'audiences'], ['experiments', 'pages'], ['campaigns', 'experiments'] ]; if (manyToMany.some(([from, to]) => (fromTable === from && toTable === to) || (fromTable === to && toTable === from))) { return 'many-to-many'; } // Lookup table JOINs are typically one-to-one but can explode if (this.isLookupTable(toTable)) { return 'one-to-many'; // Treat as potentially dangerous } // Default to one-to-many (safer assumption) return 'one-to-many'; } /** * Check if this is a known dangerous JOIN pattern */ isDangerousJoinPattern(fromTable, toTable) { const dangerousPatterns = [ ['flag_environments', 'environments'], // THE ROOT CAUSE pattern ['experiment_pages', 'pages'], ['experiment_audiences', 'audiences'], ['flags', 'projects'], ['experiments', 'projects'] ]; return dangerousPatterns.some(([from, to]) => fromTable === from && toTable === to); } /** * Check if table is a lookup table (typically large and causes explosions) */ isLookupTable(tableName) { const lookupTables = ['environments', 'projects', 'audiences', 'attributes', 'collaborators']; return lookupTables.includes(tableName); } /** * Get or estimate table statistics */ getTableStats(tableName) { const existing = this.tableStats.get(tableName); if (existing && (Date.now() - existing.lastUpdated) < 3600000) { // 1 hour cache return existing; } // Create estimated stats if not found const stats = { name: tableName, estimatedRows: this.estimateTableSize(tableName), averageJoinSelectivity: this.estimateJoinSelectivity(tableName), isLookupTable: this.isLookupTable(tableName), lastUpdated: Date.now() }; this.tableStats.set(tableName, stats); return stats; } /** * Estimate table size based on table type */ estimateTableSize(tableName) { const sizeEstimates = { // Junction tables (typically moderate size) 'flag_environments': 1000, 'experiment_pages': 500, 'experiment_audiences': 200, // Entity tables (typically smaller) 'flags': 500, 'experiments': 100, 'features': 50, 'pages': 200, 'campaigns': 50, // Lookup tables (DANGEROUS - large) 'environments': 1000, // The 700x multiplier source 'projects': 100, 'audiences': 300, 'attributes': 200, 'collaborators': 50 }; return sizeEstimates[tableName] || 100; // Default conservative estimate } /** * Estimate JOIN selectivity (how much JOINs typically filter) */ estimateJoinSelectivity(tableName) { if (this.isLookupTable(tableName)) { return 0.8; // Lookup tables have high selectivity (80% of rows match) } return 0.6; // Default 60% selectivity for normal tables } /** * Extract project_id from query context */ extractProjectId(queryContext) { if (!queryContext?.whereConditions) return undefined; const projectCondition = queryContext.whereConditions.find(condition => { const fieldName = condition.field.includes('.') ? condition.field.split('.').pop() : condition.field; return fieldName === 'project_id' && condition.operator === '='; }); return projectCondition ? String(projectCondition.value) : undefined; } /** * Generate fallback SQL when JOINs are too dangerous */ generateFallbackSQL(joins, queryType, projectId, groupByFields) { const primaryTable = joins[0]?.fromTable || 'flags'; // Build WHERE clause for project filtering let whereClause = ''; if (projectId) { whereClause = `WHERE project_id = '${projectId}'`; } // L2-1 FIX: Preserve GROUP BY for groupBy queries if (queryType === 'groupBy' && groupByFields && groupByFields.length > 0) { // For GROUP BY queries, we need to preserve the grouping logger.debug('Generating fallback SQL for GROUP BY query'); logger.debug(`GROUP BY fields: ${groupByFields.join(', ')}`); // Check if GROUP BY fields reference other tables const referencesOtherTables = groupByFields.some(field => field.includes('.')); if (referencesOtherTables) { // If GROUP BY references joined tables, we can't create a simple fallback // Instead, return a simplified count without GROUP BY logger.debug('GROUP BY references joined tables, fallback to simple count'); return `SELECT COUNT(*) as count FROM ${primaryTable} ${whereClause}`.trim(); } // Build safe GROUP BY query without dangerous JOINs const selectFields = groupByFields.concat(['COUNT(*) as count']); const groupByClause = `GROUP BY ${groupByFields.join(', ')}`; return `SELECT ${selectFields.join(', ')} FROM ${primaryTable} ${whereClause} ${groupByClause}`.trim(); } // For simple count queries, generate simple COUNT without JOINs if (queryType === 'count') { return `SELECT COUNT(*) as count FROM ${primaryTable} ${whereClause}`.trim(); } // For other queries, use basic SELECT return `SELECT * FROM ${primaryTable} ${whereClause} LIMIT 100`.trim(); } /** * Inject DISTINCT into COUNT queries when multiplication detected */ injectDistinctIfNeeded(sql, multiplier, primaryTable) { if (!this.config.autoDistinct || multiplier <= 1.5) { return sql; } // Replace COUNT(*) with COUNT(DISTINCT primary_key) if (/COUNT\s*\(\s*\*\s*\)/i.test(sql)) { const primaryKey = this.getPrimaryKeyForTable(primaryTable || 'flags'); const distinctCount = `COUNT(DISTINCT ${primaryKey})`; const modifiedSQL = sql.replace(/COUNT\s*\(\s*\*\s*\)/gi, distinctCount); if (this.config.debugMode) { logger.info(`Injected DISTINCT: COUNT(*) → ${distinctCount} (multiplier: ${multiplier}x)`); } return modifiedSQL; } return sql; } /** * Get primary key field for a table */ getPrimaryKeyForTable(tableName) { const primaryKeys = { 'flags': 'flags.key', 'experiments': 'experiments.id', 'features': 'features.id', 'pages': 'pages.id', 'audiences': 'audiences.id', 'flag_environments': 'flag_environments.flag_key', 'experiment_pages': 'experiment_pages.experiment_id' }; return primaryKeys[tableName] || `${tableName}.id`; } /** * Initialize default table statistics */ initializeTableStats() { const defaultStats = [ { name: 'flags', rows: 500, selectivity: 0.7, isLookup: false }, { name: 'flag_environments', rows: 1000, selectivity: 0.8, isLookup: false }, { name: 'environments', rows: 1000, selectivity: 0.9, isLookup: true }, // DANGER { name: 'experiments', rows: 100, selectivity: 0.6, isLookup: false }, { name: 'audiences', rows: 300, selectivity: 0.8, isLookup: true }, { name: 'projects', rows: 100, selectivity: 0.9, isLookup: true } ]; for (const stat of defaultStats) { this.tableStats.set(stat.name, { name: stat.name, estimatedRows: stat.rows, averageJoinSelectivity: stat.selectivity, isLookupTable: stat.isLookup, lastUpdated: Date.now() }); } logger.info(`Initialized table statistics for ${defaultStats.length} tables`); } /** * Update table statistics with real data */ updateTableStats(tableName, actualRows, joinSelectivity) { const existing = this.tableStats.get(tableName); this.tableStats.set(tableName, { name: tableName, estimatedRows: actualRows, averageJoinSelectivity: joinSelectivity || existing?.averageJoinSelectivity || 0.7, isLookupTable: existing?.isLookupTable || this.isLookupTable(tableName), lastUpdated: Date.now() }); logger.info(`Updated table stats for ${tableName}: ${actualRows} rows`); } /** * Get statistics for monitoring and debugging */ getStatistics() { // This would track actual usage statistics in a real implementation return { totalValidations: 0, rejectedJoins: 0, averageMultiplier: 1.0, topDangerousPatterns: [] }; } } //# sourceMappingURL=CardinalityGuard.js.map