@simonecoelhosfo/optimizely-mcp-server
Version:
Optimizely MCP Server for AI assistants with integrated CLI tools
454 lines • 18.7 kB
JavaScript
/**
* Advanced SQL Query Parser - Phase 3.2 Task 3.2.2
*
* This parser enhances the basic SQL parsing capabilities in QueryAnalysisEngine
* with advanced features needed for complex analytics queries:
*
* 1. Subquery parsing and nested field extraction
* 2. CTE (Common Table Expression) support
* 3. Window function analysis
* 4. Complex JOIN pattern detection
* 5. Advanced aggregation function analysis
* 6. Dynamic field reference resolution
*
* The parser is designed to handle the complex SQL queries that the Dynamic
* Analytics Query Engine will generate when flattening nested JSON structures.
*/
import { getLogger } from '../logging/Logger.js';
const logger = getLogger();
/**
* Advanced SQL parser with sophisticated analysis capabilities
*/
export class AdvancedSQLParser {
version = '1.0.0';
constructor() {
logger.info('AdvancedSQLParser initialized');
}
/**
* Parse a SQL query with advanced analysis
*/
parseQuery(query, primaryEntity) {
const startTime = performance.now();
try {
logger.debug('Starting advanced SQL parsing');
// Step 1: Basic query parsing (reuse from QueryAnalysisEngine logic)
const basicParsed = this.performBasicParsing(query, primaryEntity);
// Step 2: Extract CTEs
const ctes = this.extractCTEs(query);
// Step 3: Analyze window functions
const windowFunctions = this.extractWindowFunctions(query);
// Step 4: Analyze aggregate functions
const aggregates = this.extractAggregates(query);
// Step 5: Identify dynamic field references
const dynamicReferences = this.extractDynamicReferences(query);
// Step 6: Calculate complexity metrics
const complexityMetrics = this.calculateComplexityMetrics(basicParsed, ctes, windowFunctions, aggregates, dynamicReferences);
// Step 7: Identify optimization opportunities
const optimizationOpportunities = this.identifyOptimizationOpportunities(basicParsed, complexityMetrics, dynamicReferences);
const parseTime = performance.now() - startTime;
const result = {
...basicParsed,
ctes,
windowFunctions,
aggregates,
dynamicReferences,
complexityMetrics,
optimizationOpportunities
};
logger.info(`Advanced SQL parsing completed in ${parseTime}ms`);
return result;
}
catch (error) {
logger.error('Advanced SQL parsing failed');
throw new Error(`Advanced SQL parsing failed: ${error.message}`);
}
}
/**
* Perform basic SQL parsing (similar to QueryAnalysisEngine)
*/
performBasicParsing(query, primaryEntity) {
const normalizedQuery = query.trim().toUpperCase();
// Detect operation type
let operation = 'SELECT';
if (normalizedQuery.startsWith('INSERT'))
operation = 'INSERT';
else if (normalizedQuery.startsWith('UPDATE'))
operation = 'UPDATE';
else if (normalizedQuery.startsWith('DELETE'))
operation = 'DELETE';
// Extract basic field references
const referencedFields = this.extractBasicFieldReferences(query);
return {
originalQuery: query,
operation,
primaryEntity,
referencedFields,
filterFields: this.extractFilterFields(query),
projectionFields: this.extractProjectionFields(query),
groupByFields: this.extractGroupByFields(query),
orderByFields: this.extractOrderByFields(query),
joins: this.extractJoins(query),
subqueries: this.extractSubqueries(query)
};
}
/**
* Extract Common Table Expressions from query
*/
extractCTEs(query) {
const ctes = [];
// Look for WITH clauses
const withMatch = query.match(/WITH\s+(RECURSIVE\s+)?(.+?)(?=\s+SELECT\s+)/i);
if (!withMatch)
return ctes;
const isRecursive = !!withMatch[1];
const cteSection = withMatch[2];
// Parse individual CTEs (simplified - real implementation would need proper SQL parsing)
const ctePattern = /(\w+)(?:\s*\(([^)]+)\))?\s+AS\s*\(([^)]+(?:\([^)]*\))*[^)]*)\)/gi;
let match;
while ((match = ctePattern.exec(cteSection)) !== null) {
const cteName = match[1];
const columnSpec = match[2];
const cteQuery = match[3];
const columns = columnSpec ?
columnSpec.split(',').map(col => col.trim()) :
undefined;
const referencedFields = this.extractBasicFieldReferences(cteQuery);
ctes.push({
name: cteName,
columns,
query: cteQuery,
referencedFields,
isRecursive
});
}
return ctes;
}
/**
* Extract window functions from query
*/
extractWindowFunctions(query) {
const windowFunctions = [];
// Pattern to match window functions
const windowPattern = /(ROW_NUMBER|RANK|DENSE_RANK|LAG|LEAD|FIRST_VALUE|LAST_VALUE|COUNT|SUM|AVG|MIN|MAX)\s*\([^)]*\)\s+OVER\s*\(([^)]+)\)/gi;
let match;
while ((match = windowPattern.exec(query)) !== null) {
const functionName = match[1];
const windowSpec = match[2];
// Parse window specification
const partitionMatch = windowSpec.match(/PARTITION\s+BY\s+([^ORDER\s]+)/i);
const orderMatch = windowSpec.match(/ORDER\s+BY\s+([^ROWS|RANGE|GROUPS]+)/i);
const frameMatch = windowSpec.match(/(ROWS|RANGE|GROUPS)\s+(.+)/i);
const partitionBy = partitionMatch ?
partitionMatch[1].split(',').map(field => field.trim()) : [];
const orderBy = orderMatch ?
orderMatch[1].split(',').map(field => field.trim()) : [];
let frameSpec;
if (frameMatch) {
frameSpec = this.parseWindowFrame(frameMatch[1], frameMatch[2]);
}
windowFunctions.push({
functionName,
partitionBy,
orderBy,
frameSpec,
context: 'SELECT' // Simplified - would need context analysis
});
}
return windowFunctions;
}
/**
* Parse window frame specification
*/
parseWindowFrame(type, spec) {
const frameType = type.toUpperCase();
// Parse frame boundaries
const boundaryPattern = /(UNBOUNDED\s+PRECEDING|CURRENT\s+ROW|UNBOUNDED\s+FOLLOWING|\d+\s+PRECEDING|\d+\s+FOLLOWING)/gi;
const boundaries = spec.match(boundaryPattern) || [];
const start = this.parseFrameBoundary(boundaries[0] || 'UNBOUNDED PRECEDING');
const end = boundaries[1] ? this.parseFrameBoundary(boundaries[1]) : undefined;
return {
type: frameType,
start,
end
};
}
/**
* Parse individual frame boundary
*/
parseFrameBoundary(boundary) {
const normalizedBoundary = boundary.toUpperCase().trim();
if (normalizedBoundary === 'UNBOUNDED PRECEDING') {
return { type: 'UNBOUNDED_PRECEDING' };
}
else if (normalizedBoundary === 'CURRENT ROW') {
return { type: 'CURRENT_ROW' };
}
else if (normalizedBoundary === 'UNBOUNDED FOLLOWING') {
return { type: 'UNBOUNDED_FOLLOWING' };
}
else if (normalizedBoundary.includes('PRECEDING')) {
const offset = parseInt(normalizedBoundary.match(/\d+/)?.[0] || '0');
return { type: 'PRECEDING', offset };
}
else if (normalizedBoundary.includes('FOLLOWING')) {
const offset = parseInt(normalizedBoundary.match(/\d+/)?.[0] || '0');
return { type: 'FOLLOWING', offset };
}
return { type: 'CURRENT_ROW' };
}
/**
* Extract aggregate functions from query
*/
extractAggregates(query) {
const aggregates = [];
// Pattern to match aggregate functions
const aggregatePattern = /(COUNT|SUM|AVG|MIN|MAX|ARRAY_AGG|STRING_AGG|LISTAGG)\s*\(\s*(DISTINCT\s+)?([^)]+)\)(?:\s+FILTER\s*\(\s*WHERE\s+([^)]+)\))?/gi;
let match;
while ((match = aggregatePattern.exec(query)) !== null) {
const functionName = match[1];
const distinct = !!match[2];
const args = match[3];
const filter = match[4];
const argsList = args.split(',').map(arg => arg.trim());
const referencedFields = this.extractFieldsFromExpression(args);
aggregates.push({
functionName,
arguments: argsList,
distinct,
filter,
referencedFields
});
}
return aggregates;
}
/**
* Extract dynamic field references that need runtime resolution
*/
extractDynamicReferences(query) {
const dynamicRefs = [];
// JSON_EXTRACT patterns
const jsonExtractPattern = /JSON_EXTRACT\s*\(\s*([^,]+),\s*['"]\$\.([^'"]+)['"]\s*\)/gi;
let match;
while ((match = jsonExtractPattern.exec(query)) !== null) {
const baseEntity = match[1].trim();
const path = match[2];
const pathComponents = path.split(/[\.\[\]]+/).filter(comp => comp.length > 0);
dynamicRefs.push({
expression: match[0],
type: 'JSON_EXTRACT',
baseEntity,
pathComponents
});
}
// Array element access patterns
const arrayElementPattern = /(\w+)\s*\[\s*(\d+|\*)\s*\]\.(\w+)/gi;
while ((match = arrayElementPattern.exec(query)) !== null) {
const baseEntity = match[1];
const index = match[2];
const field = match[3];
dynamicRefs.push({
expression: match[0],
type: 'ARRAY_ELEMENT',
baseEntity,
pathComponents: [index, field]
});
}
// CASE expressions (conditional fields)
const casePattern = /CASE\s+WHEN\s+([^T]+)\s+THEN\s+([^W\s]+)(?:\s+WHEN\s+([^T]+)\s+THEN\s+([^E\s]+))*(?:\s+ELSE\s+([^E\s]+))?\s+END/gi;
while ((match = casePattern.exec(query)) !== null) {
const condition = match[1];
const trueField = match[2];
const falseField = match[5];
dynamicRefs.push({
expression: match[0],
type: 'CONDITIONAL_FIELD',
baseEntity: 'unknown',
pathComponents: [],
conditions: [{
condition,
trueField,
falseField
}]
});
}
return dynamicRefs;
}
/**
* Calculate comprehensive query complexity metrics
*/
calculateComplexityMetrics(basicParsed, ctes, windowFunctions, aggregates, dynamicReferences) {
// Count unique tables referenced
const tableNames = new Set();
basicParsed.referencedFields.forEach(field => {
if (field.includes('.')) {
tableNames.add(field.split('.')[0]);
}
});
const tableCount = Math.max(1, Array.from(tableNames).length);
const joinCount = basicParsed.joins.length;
const maxSubqueryDepth = this.calculateSubqueryDepth(basicParsed.subqueries);
const aggregateCount = aggregates.length;
const windowFunctionCount = windowFunctions.length;
const dynamicFieldCount = dynamicReferences.length;
// Estimate cartesian product size
const baseSize = 1000; // Assumed average table size
const estimatedCartesianSize = Math.pow(baseSize, tableCount);
// Calculate overall complexity score
let complexityScore = 0;
complexityScore += tableCount * 5;
complexityScore += joinCount * 10;
complexityScore += maxSubqueryDepth * 15;
complexityScore += aggregateCount * 8;
complexityScore += windowFunctionCount * 12;
complexityScore += dynamicFieldCount * 20;
complexityScore += ctes.length * 10;
// Cap at 100
complexityScore = Math.min(100, complexityScore);
return {
tableCount,
joinCount,
maxSubqueryDepth,
aggregateCount,
windowFunctionCount,
dynamicFieldCount,
estimatedCartesianSize,
complexityScore
};
}
/**
* Identify optimization opportunities
*/
identifyOptimizationOpportunities(basicParsed, complexityMetrics, dynamicReferences) {
const opportunities = [];
// Check for excessive JOINs
if (complexityMetrics.joinCount > 5) {
opportunities.push({
type: 'QUERY_REWRITE',
impact: 'HIGH',
description: 'Query has excessive JOIN operations that may impact performance',
suggestion: 'Consider denormalizing frequently accessed data or using materialized views',
estimatedImprovement: '40-60% reduction in execution time',
affectedElements: basicParsed.joins.map(join => join.joinedEntity)
});
}
// Check for cartesian product risk
if (complexityMetrics.estimatedCartesianSize > 1000000) {
opportunities.push({
type: 'QUERY_REWRITE',
impact: 'CRITICAL',
description: 'Query may produce extremely large cartesian product',
suggestion: 'Add more specific WHERE conditions or restructure joins',
estimatedImprovement: '90% reduction in execution time and resource usage',
affectedElements: ['JOIN conditions', 'WHERE clause']
});
}
// Check for missing indexes on frequently filtered fields
if (basicParsed.filterFields.length > 0) {
opportunities.push({
type: 'INDEX_RECOMMENDATION',
impact: 'MEDIUM',
description: 'Filtered fields may benefit from database indexes',
suggestion: 'Consider adding indexes on frequently filtered columns',
estimatedImprovement: '20-40% improvement in query speed',
affectedElements: basicParsed.filterFields
});
}
// Check for caching opportunities
if (complexityMetrics.complexityScore > 60 && dynamicReferences.length === 0) {
opportunities.push({
type: 'CACHING_OPPORTUNITY',
impact: 'MEDIUM',
description: 'Complex query with static results could benefit from caching',
suggestion: 'Implement query result caching with appropriate TTL',
estimatedImprovement: '80% improvement for repeated queries',
affectedElements: ['Query results']
});
}
return opportunities;
}
// Utility methods (simplified implementations)
extractBasicFieldReferences(query) {
const fields = [];
const fieldPattern = /\b\w+\.\w+|\b\w+(?=\s*[,\s])/g;
const matches = query.match(fieldPattern);
if (matches) {
fields.push(...matches.filter(f => !['SELECT', 'FROM', 'WHERE', 'GROUP', 'BY', 'ORDER', 'HAVING'].includes(f.toUpperCase())));
}
return [...new Set(fields)];
}
extractFilterFields(query) {
const whereMatch = query.match(/WHERE\s+(.*?)(?=\s+(?:GROUP BY|ORDER BY|LIMIT|$))/i);
if (whereMatch) {
return this.extractBasicFieldReferences(`WHERE ${whereMatch[1]}`);
}
return [];
}
extractProjectionFields(query) {
const selectMatch = query.match(/SELECT\s+(.*?)\s+FROM/i);
if (selectMatch) {
return this.extractBasicFieldReferences(`SELECT ${selectMatch[1]}`);
}
return [];
}
extractGroupByFields(query) {
const groupByMatch = query.match(/GROUP BY\s+(.*?)(?=\s+(?:ORDER BY|HAVING|LIMIT|$))/i);
if (groupByMatch) {
return this.extractBasicFieldReferences(`GROUP BY ${groupByMatch[1]}`);
}
return [];
}
extractOrderByFields(query) {
const orderByMatch = query.match(/ORDER BY\s+(.*?)(?=\s+(?:LIMIT|$))/i);
if (orderByMatch) {
return this.extractBasicFieldReferences(`ORDER BY ${orderByMatch[1]}`);
}
return [];
}
extractJoins(query) {
const joins = [];
const joinPattern = /(INNER|LEFT|RIGHT|FULL)?\s*JOIN\s+(\w+)\s+ON\s+(.*?)(?=\s+(?:INNER|LEFT|RIGHT|FULL|WHERE|GROUP|ORDER|$))/gi;
let match;
while ((match = joinPattern.exec(query)) !== null) {
joins.push({
type: (match[1] || 'INNER').toUpperCase(),
joinedEntity: match[2],
joinFields: this.extractBasicFieldReferences(`ON ${match[3]}`)
});
}
return joins;
}
extractSubqueries(query) {
// Simplified subquery extraction - real implementation would need proper parsing
const subqueryPattern = /\(\s*SELECT\s+[^)]+\)/gi;
const subqueries = [];
const matches = query.match(subqueryPattern);
if (matches) {
matches.forEach(subquery => {
// Recursively parse each subquery (simplified)
const cleanSubquery = subquery.slice(1, -1); // Remove outer parentheses
try {
const parsed = this.performBasicParsing(cleanSubquery, 'unknown');
subqueries.push(parsed);
}
catch (error) {
// Skip malformed subqueries
}
});
}
return subqueries;
}
extractFieldsFromExpression(expression) {
return this.extractBasicFieldReferences(expression);
}
calculateSubqueryDepth(subqueries) {
if (subqueries.length === 0)
return 0;
let maxDepth = 1;
for (const subquery of subqueries) {
const subDepth = this.calculateSubqueryDepth(subquery.subqueries);
maxDepth = Math.max(maxDepth, 1 + subDepth);
}
return maxDepth;
}
}
//# sourceMappingURL=AdvancedSQLParser.js.map