UNPKG

@simonecoelhosfo/optimizely-mcp-server

Version:

Optimizely MCP Server for AI assistants with integrated CLI tools

1,025 lines 119 kB
/** * RobustIntentParser - Advanced Natural Language Query Parser * * IMPLEMENTATION STATUS: * 🚧 IN PROGRESS: Core parser implementation * COMPLETE: Basic structure and main methods * TODO: Advanced features (fuzzy matching, context inference) * * Last Updated: July 3, 2025 */ import { findEntities, ENTITY_PATTERNS } from './patterns/EntityPatterns.js'; import { detectAction, getActionPattern } from './patterns/ActionPatterns.js'; import { findFields, getCanonicalFieldName } from './patterns/FieldPatterns.js'; import { applyLanguagePatterns } from './patterns/LanguagePatterns.js'; import { FuzzyMatcher } from './matchers/FuzzyMatcher.js'; import { ContextInferrer } from './matchers/ContextInferrer.js'; import { ConfidenceScorer } from './matchers/ConfidenceScorer.js'; import { PatternCache, getGlobalCache } from './cache/PatternCache.js'; import { IntelligentFieldMapper } from '../IntelligentFieldMapper.js'; import { FieldResolutionAlgorithm } from '../intelligent-query-engine/FieldResolutionStrategy.js'; import { getLogger } from '../../logging/Logger.js'; // NEW IMPORTS FOR DAY 2 INTEGRATION import { QueryDecomposer } from './decomposer/QueryDecomposer.js'; import { ViewOnlyFieldResolver } from './ViewOnlyFieldResolver.js'; export class RobustIntentParser { config; cache; fuzzyMatcher; contextInferrer; confidenceScorer; fieldMapper; fieldResolutionAlgorithm; // NEW PROPERTIES FOR DAY 2 INTEGRATION queryDecomposer; fieldResolver; currentQueryIntent; currentParseResult = {}; currentQuery = ''; /** * Adapter method to convert ViewOnlyFieldResolver response to legacy format */ adaptFieldResolution(resolution) { return { primaryLocation: resolution.viewName, requiresJoin: false, // Views never require joins confidence: 1.0, isInView: true }; } constructor(config = {}) { this.config = { enableCache: true, cacheSize: 1000, confidenceThreshold: 0.5, fuzzyMatchThreshold: 0.7, maxAlternatives: 3, timeout: 1000, ...config }; // Initialize components this.cache = this.config.enableCache ? getGlobalCache() : new PatternCache({ maxSize: 0 }); this.fuzzyMatcher = new FuzzyMatcher({ minSimilarity: this.config.fuzzyMatchThreshold }); this.contextInferrer = new ContextInferrer(); this.confidenceScorer = new ConfidenceScorer(); // Initialize field resolution algorithm (CRITICAL FIX) this.fieldResolutionAlgorithm = new FieldResolutionAlgorithm(); // Initialize new components for Day 2 integration this.queryDecomposer = new QueryDecomposer(); this.fieldResolver = new ViewOnlyFieldResolver(); // Use provided fieldMapper or create empty one if (config.fieldMapper) { this.fieldMapper = config.fieldMapper; } else { // WARNING: Without database connection, fieldMapper won't have schema knowledge! this.fieldMapper = new IntelligentFieldMapper(); } } /** * Main parse method with caching and advanced features - DAY 2 ENHANCED VERSION */ parse(query) { const startTime = performance.now(); // Store for context this.currentQuery = query; // Check cache first if (this.config.enableCache) { const cached = this.cache.get(query); if (cached) { return cached; } } // STEP 1: DECOMPOSE QUERY (NEW!) const decomposed = this.queryDecomposer.decompose(query); // STEP 2: DETECT QUERY INTENT (NEW!) this.currentQueryIntent = { type: decomposed.queryType, confidence: decomposed.confidence, reasoning: [`Detected ${decomposed.queryType} query pattern`] }; // Apply language patterns const { normalized, appliedPatterns } = applyLanguagePatterns(decomposed.normalizedQuery); // Initialize result with intent const result = { originalQuery: query, normalizedQuery: normalized, action: this.mapQueryTypeToAction(decomposed.queryType), primaryEntity: 'flags', relatedEntities: [], fields: [], filters: [], groupBy: [], orderBy: [], aggregations: [], joins: [], confidence: 0, platform: 'both', transformations: [], matchedPatterns: [], // NEW FIELDS: queryIntent: this.currentQueryIntent, fieldLocality: new Map(), aggregationContext: undefined, joinHints: undefined, decomposedQuery: decomposed }; // Set current parse result for context this.currentParseResult = result; // Track language pattern applications if (appliedPatterns.length > 0) { result.transformations.push({ type: 'normalize', from: query, to: normalized, reason: `Applied ${appliedPatterns.length} language patterns` }); } // Extract components WITH INTENT AWARENESS const actionResult = detectAction(result.normalizedQuery); result.action = actionResult.action; const entityResults = this.extractEntitiesWithIntent(result.normalizedQuery, decomposed); result.primaryEntity = entityResults.primary; result.relatedEntities = entityResults.related; result.platform = this.inferPlatform(result.primaryEntity, result.relatedEntities); // L7-11 FIX: Check for invalid cross-platform comparisons if (this.isInvalidCrossPlatformComparison(result.normalizedQuery, entityResults)) { getLogger().warn('Invalid cross-platform comparison detected - returning error result'); return { ...result, action: 'error', fields: [], filters: [], groupBy: [], orderBy: [], aggregations: [], error: 'Cannot compare audiences or attributes across different platforms. These entities are platform-specific and not compatible between Web Experimentation and Feature Experimentation.' }; } // CRITICAL: Extract fields WITH LOCALITY AWARENESS // L6-6 FIX: For COUNT queries, don't extract regular fields - only GROUP BY fields matter if (decomposed.queryType === 'count') { result.fields = []; // COUNT queries don't need regular field selection } else { result.fields = this.extractFieldsWithLocality(result.normalizedQuery, result.primaryEntity, decomposed.queryType); } result.filters = this.extractFiltersWithLocality(result.normalizedQuery, result.primaryEntity, decomposed.queryType); result.groupBy = this.extractGroupByWithLocality(result.normalizedQuery, result.primaryEntity, decomposed.queryType); result.orderBy = this.extractOrderBy(result.normalizedQuery); result.aggregations = this.extractAggregations(result.normalizedQuery, result.action); result.timeRange = this.extractTimeRange(result.normalizedQuery); result.limit = this.extractLimit(result.normalizedQuery); // BUILD AGGREGATION CONTEXT (NEW!) result.aggregationContext = this.buildAggregationContext(result, decomposed); // CRITICAL L6-7 FIX: For "list with aggregations" queries, ensure we have GROUP BY if (decomposed.queryType === 'list' && result.aggregations.length > 0) { // If we have aggregations in a list query, we need to group by the entity key fields if (result.groupBy.length === 0) { // Add primary entity key fields to GROUP BY const entitySchema = this.fieldMapper.getEntitySchema(result.primaryEntity); if (entitySchema) { const keyFields = ['key', 'name']; // Common identifier fields for (const field of keyFields) { if (entitySchema.columns.includes(field)) { const tableName = entitySchema.table || result.primaryEntity; result.groupBy.push(`${tableName}.${field}`); } } } } // Also ensure we select these fields if (result.fields.length === 0 || result.fields[0] === '*') { const entitySchema = this.fieldMapper.getEntitySchema(result.primaryEntity); if (entitySchema) { result.fields = []; const keyFields = ['key', 'name']; // Common identifier fields for (const field of keyFields) { if (entitySchema.columns.includes(field)) { result.fields.push(field); } } } } } // ANALYZE JOIN NECESSITY (NEW!) result.joinHints = this.analyzeJoinNecessity(result, decomposed); // L7-16 FIX: Check if we need experiment_results JOIN for visitor/performance data if (result.primaryEntity === 'experiments') { const visitorFields = ['visitors', 'visitor_count', 'total_visitors', 'conversion_rate', 'conversions', 'confidence', 'confidence_level', 'lift', 'statistical_significance', 'winner']; const needsResultsJoin = result.filters.some(f => visitorFields.includes(f.field)) || result.fields.some(f => visitorFields.includes(f)); if (needsResultsJoin && !result.relatedEntities.includes('experiment_results')) { // L7-16: Auto-adding experiment_results entity for visitor/performance data result.relatedEntities.push('experiment_results'); } } // Apply context inference with intent awareness const context = this.contextInferrer.inferContext(result.normalizedQuery, result.primaryEntity, result.relatedEntities, result.filters, result.fields); // Merge inferred data result.platform = context.platform; result.relatedEntities = [...new Set([...result.relatedEntities, ...context.impliedEntities])]; result.filters = [...result.filters, ...context.impliedFilters]; // CRITICAL: Override JOINs based on hints for COUNT inflation prevention const extractedJoins = this.extractJoins(result.normalizedQuery, result.primaryEntity, result.relatedEntities); // 🚨 CRITICAL FIX: Merge context.impliedJoins with extractedJoins const allJoins = [...extractedJoins, ...context.impliedJoins]; if (result.joinHints) { const joinHints = result.joinHints; const toExclude = [ ...(joinHints.prohibitedJoins || []), ...(joinHints.unnecessaryJoins || []) // 🚨 CRITICAL: Also exclude unnecessary JOINs ]; if (toExclude.length > 0) { result.joins = allJoins.filter(join => !toExclude.includes(join.entity)); getLogger().info({ originalJoins: allJoins.length, filteredJoins: result.joins.length, excludedEntities: toExclude, queryType: result.queryIntent?.type, message: 'Filtered JOINs based on enhanced parser hints to prevent COUNT inflation' }); } else { result.joins = allJoins; } } else { result.joins = allJoins; } // Calculate detailed confidence const confidenceBreakdown = this.confidenceScorer.calculateConfidence(result); result.confidence = confidenceBreakdown.overall; // Generate alternatives if confidence is low if (result.confidence < this.config.confidenceThreshold && this.config.maxAlternatives > 0) { result.alternatives = this.confidenceScorer.generateAlternatives(result, this.config.maxAlternatives); } // Add parse time to transformations const parseTime = performance.now() - startTime; result.transformations.push({ type: 'normalize', from: query, to: result.normalizedQuery, reason: `Parsed in ${parseTime.toFixed(2)}ms with intent-aware parsing` }); // Cache the result if (this.config.enableCache) { this.cache.set(query, result, parseTime); } return result; } /** * Convert ParseResult to QueryIntent format */ toQueryIntent(result) { return { action: result.action, primaryEntity: result.primaryEntity, relatedEntities: result.relatedEntities.length > 0 ? result.relatedEntities : undefined, metrics: this.extractMetricsFromResult(result), filters: result.filters.map(f => ({ field: f.field, operator: f.operator, value: f.value, type: f.isJsonPath ? 'json' : 'property' })), groupBy: result.groupBy.length > 0 ? result.groupBy : undefined, orderBy: result.orderBy.length > 0 ? result.orderBy.map(o => ({ field: o.field, direction: o.direction.toLowerCase() })) : undefined, timeRange: result.timeRange, limit: result.limit, aggregations: result.aggregations.length > 0 ? result.aggregations.map(a => ({ field: a.field, type: a.function, alias: a.alias })) : undefined }; } /** * Convert ParseResult to UniversalQuery format */ toUniversalQuery(result) { // L7-11 FIX: Handle error action - return a special error query if (result.action === 'error') { return { find: 'error', // Special case for error handling platform: 'both', error: result.error || 'Invalid query', select: [] }; } const universalQuery = { find: result.primaryEntity, platform: result.platform }; // Build select fields if (result.fields.length > 0) { universalQuery.select = result.fields; } else { // Use entity-aware default fields from action pattern const actionPattern = getActionPattern(result.action, result.primaryEntity); universalQuery.select = actionPattern.defaultFields; } // Build where conditions const whereConditions = []; if (result.filters.length > 0) { // Apply locality resolution to filters and add required JOINs result.filters.forEach(filter => { let locality; try { const resolution = this.fieldResolver.resolve(filter.field); locality = this.adaptFieldResolution(resolution); } catch (error) { // Field not in views, skip it getLogger().warn(`Field '${filter.field}' not available in views, skipping filter`); return; } // If field is in a different table, add required JOIN to relatedEntities const primaryTable = this.getTableName(result.primaryEntity); const isInDifferentTable = locality.primaryLocation !== primaryTable; if (isInDifferentTable) { const joinEntity = this.getEntityFromTable(locality.primaryLocation); if (joinEntity && !result.relatedEntities.includes(joinEntity)) { result.relatedEntities.push(joinEntity); getLogger().debug(`Phase 1 Filter JOIN Fix: Added ${joinEntity} for filter ${filter.field}`); // Force re-extraction of JOINs to include the new related entity const newJoins = this.extractJoins(result.normalizedQuery, result.primaryEntity, result.relatedEntities); // Add missing JOINs that aren't already in result.joins newJoins.forEach(newJoin => { const existingJoin = result.joins.find(j => j.entity === newJoin.entity); if (!existingJoin) { result.joins.push(newJoin); getLogger().debug(`Phase 1 Filter JOIN Fix: Added JOIN for ${newJoin.entity}`); } }); } } }); result.filters.forEach((f, index) => { whereConditions.push({ field: f.field, operator: f.operator, value: f.value, type: index === 0 ? undefined : 'AND' // First condition doesn't need type }); }); } // L7-16 FIX: Handle timeRange extraction to WHERE conditions if (result.timeRange) { getLogger().debug({ timeRange: result.timeRange }, 'L7-16: Converting timeRange to WHERE condition'); // Determine the appropriate date field based on entity let dateField = 'created_time'; // default if (result.primaryEntity === 'experiments') { // For experiments, could be created_time or start_time dateField = 'created_time'; } else if (result.primaryEntity === 'flags') { dateField = 'created_time'; } // Convert relative time to SQL condition if (result.timeRange.relative) { // Handle patterns like "last 7 days" const match = result.timeRange.relative.match(/last\s+(\d+)\s+days?/i); if (match) { const days = parseInt(match[1]); whereConditions.push({ field: dateField, operator: '>=', value: `LAST_${days}_DAYS`, type: whereConditions.length > 0 ? 'AND' : undefined }); getLogger().debug(`L7-16: Added temporal filter: ${dateField} >= LAST_${days}_DAYS`); } } } if (whereConditions.length > 0) { universalQuery.where = whereConditions; } // Build joins if (result.joins.length > 0) { universalQuery.joins = result.joins.map(j => { return { type: j.type, entity: j.entity, on: { leftField: j.on.leftField, rightField: j.on.rightField } }; }); } // Build aggregations if (result.aggregations.length > 0) { universalQuery.aggregations = result.aggregations.map(a => ({ field: a.field, function: a.function, alias: a.alias || `${a.function}_${a.field}` })); } // Add remaining fields if (result.groupBy.length > 0) universalQuery.groupBy = result.groupBy; if (result.orderBy.length > 0) universalQuery.orderBy = result.orderBy; if (result.limit) universalQuery.limit = result.limit; if (result.offset) universalQuery.offset = result.offset; // 🚨 CRITICAL: Include enhanced parser hints for COUNT inflation prevention if (result.queryIntent || result.fieldLocality || result.joinHints || result.aggregationContext || result.decomposedQuery) { universalQuery.hints = { ...universalQuery.hints, enhancedHints: { queryIntent: result.queryIntent, fieldLocality: result.fieldLocality, joinHints: result.joinHints, aggregationContext: result.aggregationContext, decomposedQuery: result.decomposedQuery } }; getLogger().debug({ hasQueryIntent: !!result.queryIntent, hasFieldLocality: !!result.fieldLocality, hasJoinHints: !!result.joinHints, hasAggregationContext: !!result.aggregationContext, message: 'Enhanced hints included in UniversalQuery.hints.enhancedHints' }); } else { getLogger().warn('No enhanced hints found in ParseResult - COUNT inflation prevention may not work'); } return universalQuery; } /** * 🚧 Normalize query for parsing */ normalizeQuery(query) { let normalized = query.toLowerCase().trim(); // Expand common contractions normalized = normalized .replace(/what's/g, 'what is') .replace(/where's/g, 'where is') .replace(/that's/g, 'that is') .replace(/it's/g, 'it is') .replace(/i'm/g, 'i am') .replace(/don't/g, 'do not') .replace(/doesn't/g, 'does not') .replace(/haven't/g, 'have not') .replace(/hasn't/g, 'has not'); // Normalize spacing normalized = normalized .replace(/\s+/g, ' ') .replace(/\s*,\s*/g, ', ') .replace(/\s*\.\s*/g, '. '); return normalized; } /** * Extract entities from query with fuzzy matching */ extractEntities(query) { const entityMatches = findEntities(query); // If no exact matches, try fuzzy matching if (entityMatches.length === 0) { // CRITICAL FIX: Define stop words to prevent logical connectors from being treated as entities const STOP_WORDS = new Set([ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'been', 'their', 'them', 'they', 'this', 'that', 'these', 'those', 'my', 'your', 'his', 'her', 'its', 'our', 'their', 'what', 'which', 'who', 'whom', 'whose', 'where', 'when', 'why', 'how', 'all', 'each', 'every', 'some', 'few', 'more', 'most', 'other', 'such', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'being' ]); const words = query.toLowerCase().split(/\s+/); for (const word of words) { // CRITICAL FIX: Skip stop words before fuzzy matching if (STOP_WORDS.has(word)) { continue; // Skip stop words like "and", "or", "with", etc. } const fuzzyMatch = this.fuzzyMatcher.findEntity(word); if (fuzzyMatch && fuzzyMatch.confidence >= this.config.fuzzyMatchThreshold) { entityMatches.push({ entity: fuzzyMatch.matched, confidence: fuzzyMatch.confidence, match: fuzzyMatch.original }); } } } if (entityMatches.length === 0) { return { primary: 'flags', related: [] }; // Default } // ENHANCED: Intelligent entity disambiguation when multiple high-confidence matches exist const topMatches = entityMatches.filter(m => m.confidence >= 0.9); let primary; if (topMatches.length === 1) { // Clear winner primary = topMatches[0].entity; } else if (topMatches.length > 1) { // Need disambiguation - prefer entities that appear in meaningful context getLogger().debug({ query: this.normalizeQuery(query), topMatches: topMatches.map(m => ({ entity: m.entity, match: m.match, confidence: m.confidence })) }, 'Multiple high-confidence entity matches - applying disambiguation'); // Priority 1: Prefer entities that appear near action words const actionWords = ['show', 'get', 'find', 'list', 'give', 'all']; const nearActionMatches = topMatches.filter(match => { const matchIndex = query.toLowerCase().indexOf(match.match.toLowerCase()); return actionWords.some(action => { const actionIndex = query.toLowerCase().indexOf(action); return actionIndex >= 0 && Math.abs(matchIndex - actionIndex) <= 20; // Within 20 characters }); }); if (nearActionMatches.length === 1) { primary = nearActionMatches[0].entity; getLogger().debug({ selected: primary, reason: 'near action words' }, 'Entity disambiguation result'); } else { // Priority 2: Prefer explicit plural forms over singular incidental matches const explicitMatches = topMatches.filter(match => { const matchWord = match.match.toLowerCase(); return matchWord.endsWith('s') || matchWord === match.entity; // Plural or exact entity name }); if (explicitMatches.length >= 1) { primary = explicitMatches[0].entity; getLogger().debug({ selected: primary, reason: 'explicit plural/entity name' }, 'Entity disambiguation result'); } else { // Fallback: Use highest confidence, but prefer pages over experiments for ambiguous cases const sortedMatches = topMatches.sort((a, b) => { if (a.confidence === b.confidence) { // Break ties by preferring certain entities const priorityOrder = ['pages', 'flags', 'experiments', 'audiences']; const aIndex = priorityOrder.indexOf(a.entity); const bIndex = priorityOrder.indexOf(b.entity); if (aIndex !== -1 && bIndex !== -1) { return aIndex - bIndex; } } return b.confidence - a.confidence; }); primary = sortedMatches[0].entity; getLogger().debug({ selected: primary, reason: 'priority order fallback' }, 'Entity disambiguation result'); } } } else { // Fallback to highest confidence primary = entityMatches[0].entity; } // Related entities are other high-confidence matches // L2-2 FIX: Filter out entities that are actually fields/properties of primary entity const related = entityMatches .slice(1) .filter(m => m.confidence >= 0.7) .map(m => m.entity) .filter(entity => { // Don't treat variations as a separate entity - they're JSON in flags/experiments if (entity === 'variations' && (primary === 'flags' || primary === 'experiments')) { getLogger().debug(`L2-2 FIX: Excluding 'variations' from JOINs - it's a JSON field in ${primary}`); return false; } // Don't join to "environments" directly - use flag_environments or experiment_environments if (entity === 'environments' && (primary === 'flags' || primary === 'experiments')) { getLogger().debug(`L2-2 FIX: Excluding 'environments' from JOINs - will use ${primary}_environments instead`); return false; } // Don't join to rules as a separate entity - they're part of flags if (entity === 'rules' && primary === 'flags') { getLogger().debug(`L2-2 FIX: Excluding 'rules' from JOINs - they're part of flags`); return false; } return true; }); return { primary, related }; } /** * Extract fields from query with proper table qualification using IntelligentFieldMapper */ extractFields(query, primaryEntity) { const fieldMatches = findFields(query, primaryEntity); const fields = fieldMatches.map(m => m.field); // Qualify fields with their correct table names using IntelligentFieldMapper return fields.map(field => this.qualifyFieldName(field, primaryEntity)); } /** * Qualify field name with correct table using IntelligentFieldMapper */ qualifyFieldName(field, primaryEntity) { // If field is already qualified (contains dot), return as-is if (field.includes('.')) { return field; } // Map common field aliases to actual column names const fieldAliasMap = { 'environment': 'environment_key', 'env': 'environment_key', 'flag': 'flag_key', 'experiment': 'experiment_id', 'rule': 'rule_key' }; const mappedField = fieldAliasMap[field.toLowerCase()] || field; try { // CRITICAL FIX: Use smart field resolution algorithm to prevent JOIN explosion const context = { primaryEntity: primaryEntity, requestedField: mappedField, isAggregation: this.isAggregationContext(), isGroupBy: true, // Called from groupBy extraction context queryType: this.determineQueryType() }; try { const resolution = this.fieldResolver.resolve(mappedField); // Log resolution decision for debugging getLogger().info(`Field resolution: '${field}' → '${mappedField}' → '${resolution.viewName}.${resolution.columnName}'`); // Use the resolved field path from the view return `${resolution.viewName}.${resolution.columnName}`; } catch (error) { getLogger().warn(`Field '${mappedField}' not available in views, using fallback`); // Continue with fallback logic below } // Fallback to original logic if confidence too low const mapping = this.fieldMapper.mapField(primaryEntity, mappedField); if (mapping.success) { if (mapping.isJsonPath) { const jsonPath = mapping.jsonPath || '$.value'; return `JSON_EXTRACT(${mapping.table}.${mapping.column}, '${jsonPath}')`; } else { return `${mapping.table}.${mapping.column}`; } } else { // Final fallback: assume it belongs to primary entity table const schema = this.fieldMapper.getEntitySchema(primaryEntity); const tableName = schema?.table || primaryEntity; return `${tableName}.${mappedField}`; } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); getLogger().warn(`Field resolution failed for '${field}' (mapped to '${mappedField}'): ${errorMessage}, using fallback`); // Emergency fallback const schema = this.fieldMapper.getEntitySchema(primaryEntity); const tableName = schema?.table || primaryEntity; return `${tableName}.${mappedField}`; } } /** * Helper method to determine if current context is aggregation */ isAggregationContext() { // Check if current parsing context involves aggregation functions // Look for aggregation keywords in the normalized query const aggregationKeywords = ['count', 'sum', 'avg', 'average', 'total', 'group', 'grouped']; const normalizedQuery = this.currentQuery?.toLowerCase() || ''; return aggregationKeywords.some(keyword => normalizedQuery.includes(keyword)); } /** * Helper method to determine query type for field resolution */ determineQueryType() { const normalizedQuery = this.currentQuery?.toLowerCase() || ''; // Check for COUNT patterns if (normalizedQuery.includes('count') || normalizedQuery.includes('total')) { return 'count'; } // Check for other aggregation patterns if (normalizedQuery.includes('sum') || normalizedQuery.includes('avg') || normalizedQuery.includes('group') || normalizedQuery.includes('aggregate')) { return 'aggregate'; } // Check for list patterns if (normalizedQuery.includes('list') || normalizedQuery.includes('show')) { return 'list'; } // Default to count (most restrictive/safest) return 'count'; } /** * Find field in related entities when not found in primary entity */ findFieldInRelatedEntities(field, primaryEntity) { const results = []; // Common field relationships const fieldEntityMap = { 'enabled': ['flag_environments', 'variations', 'rules', 'rulesets'], 'environment_key': ['flag_environments', 'environments'], 'environment': ['flag_environments', 'environments'], // Map "environment" to environment_key 'traffic_allocation': ['experiments', 'groups'], 'status': ['experiments', 'variations'], // Only experiments and variations have status 'archived': ['flags', 'experiments', 'variations'], // Flags use archived, not status 'percentage_included': ['rules', 'flag_environments'], 'audience_conditions': ['rules', 'experiments'], 'variations': ['experiments', 'rules'], 'metrics': ['experiments', 'rules'], 'key': ['flags', 'environments', 'rules'], // Note: experiments use 'id' not 'key' 'id': ['experiments', 'flags', 'audiences', 'events'], // Primary IDs 'results': ['experiment_results'], // Map results to correct table 'conversion': ['experiment_results'], 'confidence': ['experiment_results'], 'unique_conversions': ['experiment_results'], 'confidence_level': ['experiment_results'], 'experiments_id': ['extensions'], // Fix extensions foreign key 'experiment_id': ['experiment_results', 'extensions'], // Alternative naming 'page_targeting': ['experiments'], // JSON field in experiments 'url_conditions': ['experiments'], // JSON field in experiments 'success_metrics': ['rules'], // JSON field in rules 'dynamic_variables': ['variations'], // JSON field in variations 'audience_attributes': ['audiences'] // JSON field in audiences }; const candidateEntities = fieldEntityMap[field] || []; for (const entity of candidateEntities) { try { // Handle field aliases - map "environment" to "environment_key" let actualField = field; let mappingEntity = entity; // Use a separate variable for mapping if (field === 'environment' && (entity === 'flag_environments' || entity === 'environments')) { actualField = 'environment_key'; } // Map audience_ids to audience_conditions (JSON field) if (field === 'audience_ids' && (entity === 'rules' || entity === 'experiments')) { actualField = 'audience_conditions'; } // Map results table to experiment_results if (entity === 'results') { mappingEntity = 'experiment_results'; } const mapping = this.fieldMapper.mapField(mappingEntity, actualField); if (mapping.success) { results.push({ table: mapping.table, column: mapping.column, isJsonPath: mapping.isJsonPath || false, jsonPath: mapping.jsonPath, entity }); } } catch (e) { // Continue to next entity } } return results; } /** * 🚧 Extract filters from query */ extractFilters(query, primaryEntity) { const filters = []; // L7-16 FIX: Extract comparison operators BEFORE field extraction // This prevents "less than", "greater than" etc. from being treated as field names const comparisonPatterns = [ // L7-16 specific pattern: "with less than 1000 total visitors" // Changed to not capture "with" as part of the field { pattern: /(?:with\s+)?less\s+than\s+(\d+)\s+(?:total\s+)?(\w+)/gi, extractor: (match) => ({ field: match[2], operator: '<', value: match[1] }) }, { pattern: /(?:with\s+)?(?:greater|more)\s+than\s+(\d+)\s+(?:total\s+)?(\w+)/gi, extractor: (match) => ({ field: match[2], operator: '>', value: match[1] }) }, // Generic patterns: "field less than value" { pattern: /(\w+(?:\.\w+)*)\s+(?:is\s+)?less\s+than\s+(\d+)/gi, extractor: (match) => ({ field: match[1], operator: '<', value: match[2] }) }, { pattern: /(\w+(?:\.\w+)*)\s+(?:is\s+)?(?:greater|more)\s+than\s+(\d+)/gi, extractor: (match) => ({ field: match[1], operator: '>', value: match[2] }) }, // Patterns with operators { pattern: /(\w+(?:\.\w+)*)\s+(?:is\s+)?<=\s+(\d+)/gi, extractor: (match) => ({ field: match[1], operator: '<=', value: match[2] }) }, { pattern: /(\w+(?:\.\w+)*)\s+(?:is\s+)?>=\s+(\d+)/gi, extractor: (match) => ({ field: match[1], operator: '>=', value: match[2] }) }, { pattern: /(\w+(?:\.\w+)*)\s+(?:is\s+)?<\s+(\d+)/gi, extractor: (match) => ({ field: match[1], operator: '<', value: match[2] }) }, { pattern: /(\w+(?:\.\w+)*)\s+(?:is\s+)?>\s+(\d+)/gi, extractor: (match) => ({ field: match[1], operator: '>', value: match[2] }) } ]; for (const patternDef of comparisonPatterns) { let match; while ((match = patternDef.pattern.exec(query)) !== null) { const result = patternDef.extractor(match); if (result.field && result.value && result.operator) { // Skip common stop words if (['with', 'has', 'have', 'the', 'a', 'an'].includes(result.field.toLowerCase())) { continue; } // DEBUG: L7-16 - Extracted comparison - field: "${result.field}", operator: "${result.operator}", value: "${result.value}" // L2-2 FIX: Skip variations comparisons - they should be handled as JSON array length if (result.field === 'variations' && (primaryEntity === 'flags' || primaryEntity === 'experiments')) { // L2-2 FIX: Skipping direct variations comparison - will be handled as JSON_ARRAY_LENGTH continue; } // L6-10 FIX: Detect entity count aggregation patterns // Pattern: "Which X have more than N Y?" should be COUNT(Y) grouped by X, not Y > N const entityCountPattern = /(?:which|what)\s+(\w+)\s+(?:have|has)\s+(?:more|greater)\s+than\s+(\d+)\s+(\w+)/i; const entityCountMatch = query.match(entityCountPattern); if (entityCountMatch) { const primaryEntityFromQuery = entityCountMatch[1]; // "environments" const threshold = entityCountMatch[2]; // "100" const countedEntity = entityCountMatch[3]; // "flags" // L6-10 FIX: Detected entity count pattern - primaryEntity: "${primaryEntityFromQuery}", threshold: "${threshold}", countedEntity: "${countedEntity}" // This is a COUNT aggregation, not a field comparison // Don't add as a filter - this will be handled by aggregation logic // L6-10 FIX: Skipping field comparison because this is an entity count aggregation continue; } // L7-16 FIX: Map "visitors" to the correct JSON field for experiments let fieldName = result.field; if (primaryEntity === 'experiments' && fieldName === 'visitors') { fieldName = 'visitor_count'; // This will be converted to JSON_EXTRACT later } filters.push({ field: getCanonicalFieldName(fieldName, primaryEntity), operator: result.operator, value: result.value, confidence: 0.95 }); } } } // CRITICAL FIX: Add filter for A/B test rules // In Feature Experimentation, A/B tests are rules with type='a/b' if (/a\/b\s+test\s+rules?|ab\s+test\s+rules?/i.test(query)) { filters.push({ field: 'type', operator: '=', value: 'a/b', confidence: 1.0 }); getLogger().debug('Added type="a/b" filter for A/B test rules query'); } // L7-16 FIX: Create a modified query that removes comparison operator phrases // This prevents "less", "greater", etc. from being picked up as field names let modifiedQuery = query; const comparisonPhrases = [ /\bless\s+than\s+\d+/gi, /\bgreater\s+than\s+\d+/gi, /\bmore\s+than\s+\d+/gi, /\bwith\s+less\s+than\s+\d+/gi, /\bwith\s+greater\s+than\s+\d+/gi, /\bwith\s+more\s+than\s+\d+/gi ]; // Replace comparison phrases with placeholders to prevent field extraction for (const phrase of comparisonPhrases) { modifiedQuery = modifiedQuery.replace(phrase, ' [COMPARISON_OPERATOR] '); } // ENHANCED: Use sophisticated field pattern matching instead of primitive regex const fieldMatches = findFields(modifiedQuery, primaryEntity); // CRITICAL DEBUG: Log if we're finding AB as a field const abMatch = fieldMatches.find(f => f.field === 'AB' || f.field.includes('AB')); if (abMatch) { getLogger().warn({ query, abMatch, message: 'FOUND AB FIELD - THIS IS THE BUG!' }); } getLogger().debug({ query, primaryEntity, fieldMatches: fieldMatches.map(f => ({ field: f.field, confidence: f.confidence, match: f.match })) }, 'Field matches found for filter extraction'); // L7-18 FIX: Handle quantity modifiers like "multiple", "several", "many" const quantityModifiers = [ { pattern: /with\s+(multiple|several|many)\s+(\w+)/gi, minCount: 2 }, { pattern: /has\s+(multiple|several|many)\s+(\w+)/gi, minCount: 2 }, { pattern: /having\s+(multiple|several|many)\s+(\w+)/gi, minCount: 2 } ]; for (const modifierPattern of quantityModifiers) { let match; while ((match = modifierPattern.pattern.exec(query)) !== null) { const quantityWord = match[1]; const entityType = match[2]; // L7-18 FIX: Found quantity modifier "${quantityWord}" for "${entityType}" // Create a COUNT condition instead of treating as field const canonicalField = getCanonicalFieldName(entityType, primaryEntity); filters.push({ field: `COUNT(${canonicalField})`, operator: '>', value: String(modifierPattern.minCount), confidence: 0.9 }); } } // Apply domain-specific knowledge to convert field matches into filters for (const fieldMatch of fieldMatches) { // Skip if this was already handled as a quantity modifier if (/multiple|several|many/i.test(fieldMatch.match) && /with\s+(multiple|several|many)|has\s+(multiple|several|many)/i.test(query)) { continue; } const domainFilter = this.createDomainSpecificFilter(query, fieldMatch, primaryEntity); if (domainFilter) { filters.push(domainFilter); } } // Legacy patterns (kept for backward compatibility) // Pattern for "where X = Y" style filters const wherePattern = /where\s+(\w+(?:\.\w+)*)\s*(=|!=|>|<|>=|<=)\s*(['""]?)([^'""]*)\3/gi; let match; while ((match = wherePattern.exec(query)) !== null) { filters.push({ field: getCanonicalFieldName(match[1], primaryEntity), operator: match[2], value: match[4], confidence: 0.9 }); } // Pattern for "with X" or "has X" - but more restrictive to avoid false positives // L7-16 FIX: Don't match "with" when followed by comparison operators const withPattern = /(?:with|has|having)\s+(\w+)(?:\s+(?:=|of|is)\s*(['""]?)([^'""]*)\2)?/gi; while ((match = withPattern.exec(query)) !== null) { const capturedWord = match[1].toLowerCase(); // L7-16 FIX: Skip if "with" is followed by comparison terms const afterWord = query.substring(match.index + match[0].length).trim(); if (/^(less|greater|more|fewer)\s+than/i.test(afterWord)) { continue; // This is part of a comparison, not a field } // CRITICAL FIX: Skip if the captured word is part of "A/B" or similar patterns const matchIndex = match.index + match[0].indexOf(match[1]); const afterMatch = query.substring(matchIndex + match[1].length, matchIndex + match[1].length + 2); if (afterMatch.startsWith('/')) { // This is part of a compound term like "A/B", skip it continue; } // CRITICAL FIX: Skip stop words (pronouns like "their", "this", etc.) const STOP_WORDS = new Set([ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'been', 'their', 'them', 'they', 'this', 'that', 'these', 'those', 'my', 'your', 'his', 'her', 'its', 'our', 'their', 'what', 'which', 'who', 'whom', 'whose', 'where', 'when', 'why', 'how', 'all', 'each', 'every', 'some', 'few', 'more', 'most', 'other', 'such', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'being', 'less', 'greater', // L7-16: Added comparison words 'multiple', 'several', 'many' // L7-18: Added quantity modifiers ]); if (STOP_WORDS.has(capturedWord)) { continue; // Skip stop words } const field = getCanonicalFieldName(match[1], primaryEntity); if (match[3]) { filters.push({ field, operator: '=', value: match[3], confidence: 0.8 }); } else { filters.push({ field, operator: 'IS NOT NULL', value: null, confidence: 0.7 }); } } // Pattern for "NOT enabled" or "have NOT been enabled" - Check this FIRST if (/(?:not|haven't|have not)\s+(?:been\s+)?enabled/i.test(query)) { filters.push({ field: 'enabled', operator: '=', value: false, confidence: 0.9, isJsonPath: true }); } // Pattern for "enabled" status - but NOT when preceded by "not" else if (/\benabled\b/i.test(query) && !filters.some(f => f.field.includes('enabled'))) { // Make sure "enabled" is not preceded by "not", "haven't", etc. if (!/(?:not|haven't|have not)\s+(?:been\s+)?enabled/i.test(query)) { filters.push({ field: this.qualifyFieldName('enabled', primaryEntity), operator: '=', value: true, confidence: 0.8 }); } } // Pattern for "dis