@simonecoelhosfo/optimizely-mcp-server
Version:
Optimizely MCP Server for AI assistants with integrated CLI tools
395 lines • 17.1 kB
JavaScript
/**
* JOIN Cardinality Guard - Prevent Query Explosion
*
* CRITICAL COMPONENT: Prevents Cartesian product explosions by:
* 1. Estimating JOIN multiplication factors
* 2. Enforcing cardinality limits based on query type
* 3. Automatically injecting DISTINCT when necessary
* 4. Providing fallback strategies for dangerous JOINs
*
* Created: July 5, 2025
* Purpose: Second line of defense against 700-1400x count inflation
*/
import { getLogger } from '../../logging/Logger.js';
const logger = getLogger();
export class CardinalityGuard {
config;
tableStats = new Map();
joinHistograms = new Map(); // Track historical multipliers
constructor(config = {}) {
this.config = {
enabled: true,
limits: {
simpleCount: 1, // COUNT(*) should never multiply
groupByCount: 2, // GROUP BY COUNT can multiply by 2x max
crossEntity: 5, // Cross-entity queries up to 5x
detail: 10, // Detail queries up to 10x
maxExecutionTime: 5000 // 5 second timeout
},
autoDistinct: true,
fallbackEnabled: true,
debugMode: true,
...config
};
this.initializeTableStats();
logger.info(`CardinalityGuard initialized with limits: ${JSON.stringify(this.config.limits)}`);
}
/**
* Validate if a set of JOINs is safe based on cardinality estimation
*/
validateJoinCardinality(joins, queryType, queryContext) {
if (!this.config.enabled) {
return { isValid: true, totalMultiplier: 1, estimates: [], recommendations: [] };
}
const estimates = joins.map(join => this.estimateJoinCardinality(join));
const totalMultiplier = this.calculateTotalMultiplier(estimates);
const limit = this.getLimitForQueryType(queryType);
// L2-1 FIX: Check if this is a user-requested GROUP BY
const isUserRequestedGroupBy = queryType === 'groupBy' && queryContext?.groupByFields && queryContext.groupByFields.length > 0;
// Allow user-requested GROUP BY queries even if they exceed normal limits
const isValid = isUserRequestedGroupBy ? true : (totalMultiplier <= limit);
const recommendations = [];
if (!isValid && !isUserRequestedGroupBy) {
recommendations.push(`Total multiplier ${totalMultiplier}x exceeds limit ${limit}x for ${queryType} queries`);
// Add specific recommendations
for (const estimate of estimates) {
if (estimate.expectedMultiplier > 2) {
if (estimate.relationshipType === 'many-to-many') {
recommendations.push(`Avoid many-to-many JOIN ${estimate.fromTable} → ${estimate.toTable} (${estimate.expectedMultiplier}x)`);
}
else if (this.isLookupTable(estimate.toTable)) {
recommendations.push(`Avoid JOIN to lookup table ${estimate.toTable} (${estimate.expectedMultiplier}x)`);
}
}
}
// Add auto-DISTINCT recommendation
if (this.config.autoDistinct && queryType === 'count') {
recommendations.push('Consider using COUNT(DISTINCT primary_key) instead of COUNT(*)');
}
}
if (this.config.debugMode) {
logger.info(`Cardinality validation: ${joins.length} JOINs, ${totalMultiplier}x multiplier, valid: ${isValid}`);
if (isUserRequestedGroupBy) {
logger.debug(`Allowing user-requested GROUP BY despite ${totalMultiplier}x multiplier`);
}
estimates.forEach(est => {
logger.debug(` ${est.fromTable} → ${est.toTable}: ${est.expectedMultiplier}x (${est.relationshipType})`);
});
}
return {
isValid,
totalMultiplier,
estimates,
recommendations,
fallbackSQL: !isValid && this.config.fallbackEnabled ? (() => {
const projectId = this.extractProjectId(queryContext);
const groupByFields = queryContext?.groupByFields;
const fallbackSQL = this.generateFallbackSQL(joins, queryType, projectId, groupByFields);
return fallbackSQL;
})() : undefined
};
}
/**
* Estimate cardinality for a single JOIN
*/
estimateJoinCardinality(join) {
const fromStats = this.getTableStats(join.fromTable);
const toStats = this.getTableStats(join.toTable);
// Determine relationship type based on table characteristics
const relationshipType = this.determineRelationshipType(join.fromTable, join.toTable);
let expectedMultiplier;
let confidence;
switch (relationshipType) {
case 'one-to-one':
expectedMultiplier = 1;
confidence = 0.9;
break;
case 'one-to-many':
// Calculate average number of related records
expectedMultiplier = Math.min(toStats.estimatedRows / fromStats.estimatedRows, 10);
expectedMultiplier = Math.max(expectedMultiplier, 1); // Never less than 1
confidence = 0.7;
break;
case 'many-to-many':
// Most dangerous - can create massive explosions
expectedMultiplier = Math.sqrt(fromStats.estimatedRows * toStats.estimatedRows) / 100;
expectedMultiplier = Math.min(expectedMultiplier, 1000); // Cap at 1000x
confidence = 0.5; // Low confidence due to high variability
break;
}
// Apply JOIN selectivity factor
const selectivity = Math.min(fromStats.averageJoinSelectivity, toStats.averageJoinSelectivity);
expectedMultiplier *= selectivity;
// Special handling for known dangerous patterns
if (this.isDangerousJoinPattern(join.fromTable, join.toTable)) {
expectedMultiplier *= 10; // Multiply by danger factor
confidence *= 0.5; // Reduce confidence
}
return {
fromTable: join.fromTable,
toTable: join.toTable,
estimatedFromRows: fromStats.estimatedRows,
estimatedToRows: toStats.estimatedRows,
relationshipType,
expectedMultiplier: Math.round(expectedMultiplier * 100) / 100, // Round to 2 decimals
confidence
};
}
/**
* Calculate total multiplication factor for all JOINs
*/
calculateTotalMultiplier(estimates) {
return estimates.reduce((total, estimate) => total * estimate.expectedMultiplier, 1);
}
/**
* Get cardinality limit for query type
*/
getLimitForQueryType(queryType) {
switch (queryType) {
case 'count':
return this.config.limits.simpleCount;
case 'groupBy':
return this.config.limits.groupByCount;
case 'crossEntity':
return this.config.limits.crossEntity;
case 'detail':
return this.config.limits.detail;
default:
return this.config.limits.groupByCount; // Safe default
}
}
/**
* Determine relationship type between two tables
*/
determineRelationshipType(fromTable, toTable) {
// Known one-to-many relationships
const oneToMany = [
['flags', 'flag_environments'],
['experiments', 'experiment_pages'],
['projects', 'flags'],
['projects', 'experiments']
];
if (oneToMany.some(([from, to]) => fromTable === from && toTable === to)) {
return 'one-to-many';
}
// Known many-to-many relationships (dangerous)
const manyToMany = [
['experiments', 'audiences'],
['flags', 'audiences'],
['experiments', 'pages'],
['campaigns', 'experiments']
];
if (manyToMany.some(([from, to]) => (fromTable === from && toTable === to) || (fromTable === to && toTable === from))) {
return 'many-to-many';
}
// Lookup table JOINs are typically one-to-one but can explode
if (this.isLookupTable(toTable)) {
return 'one-to-many'; // Treat as potentially dangerous
}
// Default to one-to-many (safer assumption)
return 'one-to-many';
}
/**
* Check if this is a known dangerous JOIN pattern
*/
isDangerousJoinPattern(fromTable, toTable) {
const dangerousPatterns = [
['flag_environments', 'environments'], // THE ROOT CAUSE pattern
['experiment_pages', 'pages'],
['experiment_audiences', 'audiences'],
['flags', 'projects'],
['experiments', 'projects']
];
return dangerousPatterns.some(([from, to]) => fromTable === from && toTable === to);
}
/**
* Check if table is a lookup table (typically large and causes explosions)
*/
isLookupTable(tableName) {
const lookupTables = ['environments', 'projects', 'audiences', 'attributes', 'collaborators'];
return lookupTables.includes(tableName);
}
/**
* Get or estimate table statistics
*/
getTableStats(tableName) {
const existing = this.tableStats.get(tableName);
if (existing && (Date.now() - existing.lastUpdated) < 3600000) { // 1 hour cache
return existing;
}
// Create estimated stats if not found
const stats = {
name: tableName,
estimatedRows: this.estimateTableSize(tableName),
averageJoinSelectivity: this.estimateJoinSelectivity(tableName),
isLookupTable: this.isLookupTable(tableName),
lastUpdated: Date.now()
};
this.tableStats.set(tableName, stats);
return stats;
}
/**
* Estimate table size based on table type
*/
estimateTableSize(tableName) {
const sizeEstimates = {
// Junction tables (typically moderate size)
'flag_environments': 1000,
'experiment_pages': 500,
'experiment_audiences': 200,
// Entity tables (typically smaller)
'flags': 500,
'experiments': 100,
'features': 50,
'pages': 200,
'campaigns': 50,
// Lookup tables (DANGEROUS - large)
'environments': 1000, // The 700x multiplier source
'projects': 100,
'audiences': 300,
'attributes': 200,
'collaborators': 50
};
return sizeEstimates[tableName] || 100; // Default conservative estimate
}
/**
* Estimate JOIN selectivity (how much JOINs typically filter)
*/
estimateJoinSelectivity(tableName) {
if (this.isLookupTable(tableName)) {
return 0.8; // Lookup tables have high selectivity (80% of rows match)
}
return 0.6; // Default 60% selectivity for normal tables
}
/**
* Extract project_id from query context
*/
extractProjectId(queryContext) {
if (!queryContext?.whereConditions)
return undefined;
const projectCondition = queryContext.whereConditions.find(condition => {
const fieldName = condition.field.includes('.') ? condition.field.split('.').pop() : condition.field;
return fieldName === 'project_id' && condition.operator === '=';
});
return projectCondition ? String(projectCondition.value) : undefined;
}
/**
* Generate fallback SQL when JOINs are too dangerous
*/
generateFallbackSQL(joins, queryType, projectId, groupByFields) {
const primaryTable = joins[0]?.fromTable || 'flags';
// Build WHERE clause for project filtering
let whereClause = '';
if (projectId) {
whereClause = `WHERE project_id = '${projectId}'`;
}
// L2-1 FIX: Preserve GROUP BY for groupBy queries
if (queryType === 'groupBy' && groupByFields && groupByFields.length > 0) {
// For GROUP BY queries, we need to preserve the grouping
logger.debug('Generating fallback SQL for GROUP BY query');
logger.debug(`GROUP BY fields: ${groupByFields.join(', ')}`);
// Check if GROUP BY fields reference other tables
const referencesOtherTables = groupByFields.some(field => field.includes('.'));
if (referencesOtherTables) {
// If GROUP BY references joined tables, we can't create a simple fallback
// Instead, return a simplified count without GROUP BY
logger.debug('GROUP BY references joined tables, fallback to simple count');
return `SELECT COUNT(*) as count FROM ${primaryTable} ${whereClause}`.trim();
}
// Build safe GROUP BY query without dangerous JOINs
const selectFields = groupByFields.concat(['COUNT(*) as count']);
const groupByClause = `GROUP BY ${groupByFields.join(', ')}`;
return `SELECT ${selectFields.join(', ')} FROM ${primaryTable} ${whereClause} ${groupByClause}`.trim();
}
// For simple count queries, generate simple COUNT without JOINs
if (queryType === 'count') {
return `SELECT COUNT(*) as count FROM ${primaryTable} ${whereClause}`.trim();
}
// For other queries, use basic SELECT
return `SELECT * FROM ${primaryTable} ${whereClause} LIMIT 100`.trim();
}
/**
* Inject DISTINCT into COUNT queries when multiplication detected
*/
injectDistinctIfNeeded(sql, multiplier, primaryTable) {
if (!this.config.autoDistinct || multiplier <= 1.5) {
return sql;
}
// Replace COUNT(*) with COUNT(DISTINCT primary_key)
if (/COUNT\s*\(\s*\*\s*\)/i.test(sql)) {
const primaryKey = this.getPrimaryKeyForTable(primaryTable || 'flags');
const distinctCount = `COUNT(DISTINCT ${primaryKey})`;
const modifiedSQL = sql.replace(/COUNT\s*\(\s*\*\s*\)/gi, distinctCount);
if (this.config.debugMode) {
logger.info(`Injected DISTINCT: COUNT(*) → ${distinctCount} (multiplier: ${multiplier}x)`);
}
return modifiedSQL;
}
return sql;
}
/**
* Get primary key field for a table
*/
getPrimaryKeyForTable(tableName) {
const primaryKeys = {
'flags': 'flags.key',
'experiments': 'experiments.id',
'features': 'features.id',
'pages': 'pages.id',
'audiences': 'audiences.id',
'flag_environments': 'flag_environments.flag_key',
'experiment_pages': 'experiment_pages.experiment_id'
};
return primaryKeys[tableName] || `${tableName}.id`;
}
/**
* Initialize default table statistics
*/
initializeTableStats() {
const defaultStats = [
{ name: 'flags', rows: 500, selectivity: 0.7, isLookup: false },
{ name: 'flag_environments', rows: 1000, selectivity: 0.8, isLookup: false },
{ name: 'environments', rows: 1000, selectivity: 0.9, isLookup: true }, // DANGER
{ name: 'experiments', rows: 100, selectivity: 0.6, isLookup: false },
{ name: 'audiences', rows: 300, selectivity: 0.8, isLookup: true },
{ name: 'projects', rows: 100, selectivity: 0.9, isLookup: true }
];
for (const stat of defaultStats) {
this.tableStats.set(stat.name, {
name: stat.name,
estimatedRows: stat.rows,
averageJoinSelectivity: stat.selectivity,
isLookupTable: stat.isLookup,
lastUpdated: Date.now()
});
}
logger.info(`Initialized table statistics for ${defaultStats.length} tables`);
}
/**
* Update table statistics with real data
*/
updateTableStats(tableName, actualRows, joinSelectivity) {
const existing = this.tableStats.get(tableName);
this.tableStats.set(tableName, {
name: tableName,
estimatedRows: actualRows,
averageJoinSelectivity: joinSelectivity || existing?.averageJoinSelectivity || 0.7,
isLookupTable: existing?.isLookupTable || this.isLookupTable(tableName),
lastUpdated: Date.now()
});
logger.info(`Updated table stats for ${tableName}: ${actualRows} rows`);
}
/**
* Get statistics for monitoring and debugging
*/
getStatistics() {
// This would track actual usage statistics in a real implementation
return {
totalValidations: 0,
rejectedJoins: 0,
averageMultiplier: 1.0,
topDangerousPatterns: []
};
}
}
//# sourceMappingURL=CardinalityGuard.js.map