UNPKG

@simonecoelhosfo/optimizely-mcp-server

Version:

Optimizely MCP Server for AI assistants with integrated CLI tools

454 lines 18.7 kB
/** * Advanced SQL Query Parser - Phase 3.2 Task 3.2.2 * * This parser enhances the basic SQL parsing capabilities in QueryAnalysisEngine * with advanced features needed for complex analytics queries: * * 1. Subquery parsing and nested field extraction * 2. CTE (Common Table Expression) support * 3. Window function analysis * 4. Complex JOIN pattern detection * 5. Advanced aggregation function analysis * 6. Dynamic field reference resolution * * The parser is designed to handle the complex SQL queries that the Dynamic * Analytics Query Engine will generate when flattening nested JSON structures. */ import { getLogger } from '../logging/Logger.js'; const logger = getLogger(); /** * Advanced SQL parser with sophisticated analysis capabilities */ export class AdvancedSQLParser { version = '1.0.0'; constructor() { logger.info('AdvancedSQLParser initialized'); } /** * Parse a SQL query with advanced analysis */ parseQuery(query, primaryEntity) { const startTime = performance.now(); try { logger.debug('Starting advanced SQL parsing'); // Step 1: Basic query parsing (reuse from QueryAnalysisEngine logic) const basicParsed = this.performBasicParsing(query, primaryEntity); // Step 2: Extract CTEs const ctes = this.extractCTEs(query); // Step 3: Analyze window functions const windowFunctions = this.extractWindowFunctions(query); // Step 4: Analyze aggregate functions const aggregates = this.extractAggregates(query); // Step 5: Identify dynamic field references const dynamicReferences = this.extractDynamicReferences(query); // Step 6: Calculate complexity metrics const complexityMetrics = this.calculateComplexityMetrics(basicParsed, ctes, windowFunctions, aggregates, dynamicReferences); // Step 7: Identify optimization opportunities const optimizationOpportunities = this.identifyOptimizationOpportunities(basicParsed, complexityMetrics, dynamicReferences); const parseTime = performance.now() - startTime; const result = { ...basicParsed, ctes, windowFunctions, aggregates, dynamicReferences, complexityMetrics, optimizationOpportunities }; logger.info(`Advanced SQL parsing completed in ${parseTime}ms`); return result; } catch (error) { logger.error('Advanced SQL parsing failed'); throw new Error(`Advanced SQL parsing failed: ${error.message}`); } } /** * Perform basic SQL parsing (similar to QueryAnalysisEngine) */ performBasicParsing(query, primaryEntity) { const normalizedQuery = query.trim().toUpperCase(); // Detect operation type let operation = 'SELECT'; if (normalizedQuery.startsWith('INSERT')) operation = 'INSERT'; else if (normalizedQuery.startsWith('UPDATE')) operation = 'UPDATE'; else if (normalizedQuery.startsWith('DELETE')) operation = 'DELETE'; // Extract basic field references const referencedFields = this.extractBasicFieldReferences(query); return { originalQuery: query, operation, primaryEntity, referencedFields, filterFields: this.extractFilterFields(query), projectionFields: this.extractProjectionFields(query), groupByFields: this.extractGroupByFields(query), orderByFields: this.extractOrderByFields(query), joins: this.extractJoins(query), subqueries: this.extractSubqueries(query) }; } /** * Extract Common Table Expressions from query */ extractCTEs(query) { const ctes = []; // Look for WITH clauses const withMatch = query.match(/WITH\s+(RECURSIVE\s+)?(.+?)(?=\s+SELECT\s+)/i); if (!withMatch) return ctes; const isRecursive = !!withMatch[1]; const cteSection = withMatch[2]; // Parse individual CTEs (simplified - real implementation would need proper SQL parsing) const ctePattern = /(\w+)(?:\s*\(([^)]+)\))?\s+AS\s*\(([^)]+(?:\([^)]*\))*[^)]*)\)/gi; let match; while ((match = ctePattern.exec(cteSection)) !== null) { const cteName = match[1]; const columnSpec = match[2]; const cteQuery = match[3]; const columns = columnSpec ? columnSpec.split(',').map(col => col.trim()) : undefined; const referencedFields = this.extractBasicFieldReferences(cteQuery); ctes.push({ name: cteName, columns, query: cteQuery, referencedFields, isRecursive }); } return ctes; } /** * Extract window functions from query */ extractWindowFunctions(query) { const windowFunctions = []; // Pattern to match window functions const windowPattern = /(ROW_NUMBER|RANK|DENSE_RANK|LAG|LEAD|FIRST_VALUE|LAST_VALUE|COUNT|SUM|AVG|MIN|MAX)\s*\([^)]*\)\s+OVER\s*\(([^)]+)\)/gi; let match; while ((match = windowPattern.exec(query)) !== null) { const functionName = match[1]; const windowSpec = match[2]; // Parse window specification const partitionMatch = windowSpec.match(/PARTITION\s+BY\s+([^ORDER\s]+)/i); const orderMatch = windowSpec.match(/ORDER\s+BY\s+([^ROWS|RANGE|GROUPS]+)/i); const frameMatch = windowSpec.match(/(ROWS|RANGE|GROUPS)\s+(.+)/i); const partitionBy = partitionMatch ? partitionMatch[1].split(',').map(field => field.trim()) : []; const orderBy = orderMatch ? orderMatch[1].split(',').map(field => field.trim()) : []; let frameSpec; if (frameMatch) { frameSpec = this.parseWindowFrame(frameMatch[1], frameMatch[2]); } windowFunctions.push({ functionName, partitionBy, orderBy, frameSpec, context: 'SELECT' // Simplified - would need context analysis }); } return windowFunctions; } /** * Parse window frame specification */ parseWindowFrame(type, spec) { const frameType = type.toUpperCase(); // Parse frame boundaries const boundaryPattern = /(UNBOUNDED\s+PRECEDING|CURRENT\s+ROW|UNBOUNDED\s+FOLLOWING|\d+\s+PRECEDING|\d+\s+FOLLOWING)/gi; const boundaries = spec.match(boundaryPattern) || []; const start = this.parseFrameBoundary(boundaries[0] || 'UNBOUNDED PRECEDING'); const end = boundaries[1] ? this.parseFrameBoundary(boundaries[1]) : undefined; return { type: frameType, start, end }; } /** * Parse individual frame boundary */ parseFrameBoundary(boundary) { const normalizedBoundary = boundary.toUpperCase().trim(); if (normalizedBoundary === 'UNBOUNDED PRECEDING') { return { type: 'UNBOUNDED_PRECEDING' }; } else if (normalizedBoundary === 'CURRENT ROW') { return { type: 'CURRENT_ROW' }; } else if (normalizedBoundary === 'UNBOUNDED FOLLOWING') { return { type: 'UNBOUNDED_FOLLOWING' }; } else if (normalizedBoundary.includes('PRECEDING')) { const offset = parseInt(normalizedBoundary.match(/\d+/)?.[0] || '0'); return { type: 'PRECEDING', offset }; } else if (normalizedBoundary.includes('FOLLOWING')) { const offset = parseInt(normalizedBoundary.match(/\d+/)?.[0] || '0'); return { type: 'FOLLOWING', offset }; } return { type: 'CURRENT_ROW' }; } /** * Extract aggregate functions from query */ extractAggregates(query) { const aggregates = []; // Pattern to match aggregate functions const aggregatePattern = /(COUNT|SUM|AVG|MIN|MAX|ARRAY_AGG|STRING_AGG|LISTAGG)\s*\(\s*(DISTINCT\s+)?([^)]+)\)(?:\s+FILTER\s*\(\s*WHERE\s+([^)]+)\))?/gi; let match; while ((match = aggregatePattern.exec(query)) !== null) { const functionName = match[1]; const distinct = !!match[2]; const args = match[3]; const filter = match[4]; const argsList = args.split(',').map(arg => arg.trim()); const referencedFields = this.extractFieldsFromExpression(args); aggregates.push({ functionName, arguments: argsList, distinct, filter, referencedFields }); } return aggregates; } /** * Extract dynamic field references that need runtime resolution */ extractDynamicReferences(query) { const dynamicRefs = []; // JSON_EXTRACT patterns const jsonExtractPattern = /JSON_EXTRACT\s*\(\s*([^,]+),\s*['"]\$\.([^'"]+)['"]\s*\)/gi; let match; while ((match = jsonExtractPattern.exec(query)) !== null) { const baseEntity = match[1].trim(); const path = match[2]; const pathComponents = path.split(/[\.\[\]]+/).filter(comp => comp.length > 0); dynamicRefs.push({ expression: match[0], type: 'JSON_EXTRACT', baseEntity, pathComponents }); } // Array element access patterns const arrayElementPattern = /(\w+)\s*\[\s*(\d+|\*)\s*\]\.(\w+)/gi; while ((match = arrayElementPattern.exec(query)) !== null) { const baseEntity = match[1]; const index = match[2]; const field = match[3]; dynamicRefs.push({ expression: match[0], type: 'ARRAY_ELEMENT', baseEntity, pathComponents: [index, field] }); } // CASE expressions (conditional fields) const casePattern = /CASE\s+WHEN\s+([^T]+)\s+THEN\s+([^W\s]+)(?:\s+WHEN\s+([^T]+)\s+THEN\s+([^E\s]+))*(?:\s+ELSE\s+([^E\s]+))?\s+END/gi; while ((match = casePattern.exec(query)) !== null) { const condition = match[1]; const trueField = match[2]; const falseField = match[5]; dynamicRefs.push({ expression: match[0], type: 'CONDITIONAL_FIELD', baseEntity: 'unknown', pathComponents: [], conditions: [{ condition, trueField, falseField }] }); } return dynamicRefs; } /** * Calculate comprehensive query complexity metrics */ calculateComplexityMetrics(basicParsed, ctes, windowFunctions, aggregates, dynamicReferences) { // Count unique tables referenced const tableNames = new Set(); basicParsed.referencedFields.forEach(field => { if (field.includes('.')) { tableNames.add(field.split('.')[0]); } }); const tableCount = Math.max(1, Array.from(tableNames).length); const joinCount = basicParsed.joins.length; const maxSubqueryDepth = this.calculateSubqueryDepth(basicParsed.subqueries); const aggregateCount = aggregates.length; const windowFunctionCount = windowFunctions.length; const dynamicFieldCount = dynamicReferences.length; // Estimate cartesian product size const baseSize = 1000; // Assumed average table size const estimatedCartesianSize = Math.pow(baseSize, tableCount); // Calculate overall complexity score let complexityScore = 0; complexityScore += tableCount * 5; complexityScore += joinCount * 10; complexityScore += maxSubqueryDepth * 15; complexityScore += aggregateCount * 8; complexityScore += windowFunctionCount * 12; complexityScore += dynamicFieldCount * 20; complexityScore += ctes.length * 10; // Cap at 100 complexityScore = Math.min(100, complexityScore); return { tableCount, joinCount, maxSubqueryDepth, aggregateCount, windowFunctionCount, dynamicFieldCount, estimatedCartesianSize, complexityScore }; } /** * Identify optimization opportunities */ identifyOptimizationOpportunities(basicParsed, complexityMetrics, dynamicReferences) { const opportunities = []; // Check for excessive JOINs if (complexityMetrics.joinCount > 5) { opportunities.push({ type: 'QUERY_REWRITE', impact: 'HIGH', description: 'Query has excessive JOIN operations that may impact performance', suggestion: 'Consider denormalizing frequently accessed data or using materialized views', estimatedImprovement: '40-60% reduction in execution time', affectedElements: basicParsed.joins.map(join => join.joinedEntity) }); } // Check for cartesian product risk if (complexityMetrics.estimatedCartesianSize > 1000000) { opportunities.push({ type: 'QUERY_REWRITE', impact: 'CRITICAL', description: 'Query may produce extremely large cartesian product', suggestion: 'Add more specific WHERE conditions or restructure joins', estimatedImprovement: '90% reduction in execution time and resource usage', affectedElements: ['JOIN conditions', 'WHERE clause'] }); } // Check for missing indexes on frequently filtered fields if (basicParsed.filterFields.length > 0) { opportunities.push({ type: 'INDEX_RECOMMENDATION', impact: 'MEDIUM', description: 'Filtered fields may benefit from database indexes', suggestion: 'Consider adding indexes on frequently filtered columns', estimatedImprovement: '20-40% improvement in query speed', affectedElements: basicParsed.filterFields }); } // Check for caching opportunities if (complexityMetrics.complexityScore > 60 && dynamicReferences.length === 0) { opportunities.push({ type: 'CACHING_OPPORTUNITY', impact: 'MEDIUM', description: 'Complex query with static results could benefit from caching', suggestion: 'Implement query result caching with appropriate TTL', estimatedImprovement: '80% improvement for repeated queries', affectedElements: ['Query results'] }); } return opportunities; } // Utility methods (simplified implementations) extractBasicFieldReferences(query) { const fields = []; const fieldPattern = /\b\w+\.\w+|\b\w+(?=\s*[,\s])/g; const matches = query.match(fieldPattern); if (matches) { fields.push(...matches.filter(f => !['SELECT', 'FROM', 'WHERE', 'GROUP', 'BY', 'ORDER', 'HAVING'].includes(f.toUpperCase()))); } return [...new Set(fields)]; } extractFilterFields(query) { const whereMatch = query.match(/WHERE\s+(.*?)(?=\s+(?:GROUP BY|ORDER BY|LIMIT|$))/i); if (whereMatch) { return this.extractBasicFieldReferences(`WHERE ${whereMatch[1]}`); } return []; } extractProjectionFields(query) { const selectMatch = query.match(/SELECT\s+(.*?)\s+FROM/i); if (selectMatch) { return this.extractBasicFieldReferences(`SELECT ${selectMatch[1]}`); } return []; } extractGroupByFields(query) { const groupByMatch = query.match(/GROUP BY\s+(.*?)(?=\s+(?:ORDER BY|HAVING|LIMIT|$))/i); if (groupByMatch) { return this.extractBasicFieldReferences(`GROUP BY ${groupByMatch[1]}`); } return []; } extractOrderByFields(query) { const orderByMatch = query.match(/ORDER BY\s+(.*?)(?=\s+(?:LIMIT|$))/i); if (orderByMatch) { return this.extractBasicFieldReferences(`ORDER BY ${orderByMatch[1]}`); } return []; } extractJoins(query) { const joins = []; const joinPattern = /(INNER|LEFT|RIGHT|FULL)?\s*JOIN\s+(\w+)\s+ON\s+(.*?)(?=\s+(?:INNER|LEFT|RIGHT|FULL|WHERE|GROUP|ORDER|$))/gi; let match; while ((match = joinPattern.exec(query)) !== null) { joins.push({ type: (match[1] || 'INNER').toUpperCase(), joinedEntity: match[2], joinFields: this.extractBasicFieldReferences(`ON ${match[3]}`) }); } return joins; } extractSubqueries(query) { // Simplified subquery extraction - real implementation would need proper parsing const subqueryPattern = /\(\s*SELECT\s+[^)]+\)/gi; const subqueries = []; const matches = query.match(subqueryPattern); if (matches) { matches.forEach(subquery => { // Recursively parse each subquery (simplified) const cleanSubquery = subquery.slice(1, -1); // Remove outer parentheses try { const parsed = this.performBasicParsing(cleanSubquery, 'unknown'); subqueries.push(parsed); } catch (error) { // Skip malformed subqueries } }); } return subqueries; } extractFieldsFromExpression(expression) { return this.extractBasicFieldReferences(expression); } calculateSubqueryDepth(subqueries) { if (subqueries.length === 0) return 0; let maxDepth = 1; for (const subquery of subqueries) { const subDepth = this.calculateSubqueryDepth(subquery.subqueries); maxDepth = Math.max(maxDepth, 1 + subDepth); } return maxDepth; } } //# sourceMappingURL=AdvancedSQLParser.js.map