claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
1,690 lines (1,509 loc) • 70.8 kB
text/typescript
/**
* RuVector Self-Learning Optimization Module
*
* SONA-inspired self-learning features for the RuVector PostgreSQL Bridge.
* Implements adaptive query optimization, intelligent index tuning,
* pattern recognition, and continuous learning with EWC++ protection.
*
* @module @claude-flow/plugins/integrations/ruvector/self-learning
* @version 1.0.0
*/
import type {
VectorIndexType,
DistanceMetric,
IndexStats,
QueryStats,
VectorSearchOptions,
} from './types.js';
// ============================================================================
// Query Analysis Types
// ============================================================================
/**
* Analysis result for a SQL query.
*/
export interface QueryAnalysis {
/** Original SQL query */
readonly sql: string;
/** Query type (SELECT, INSERT, UPDATE, DELETE) */
readonly queryType: QueryType;
/** Tables referenced in the query */
readonly tables: string[];
/** Columns referenced in the query */
readonly columns: string[];
/** Vector operations detected */
readonly vectorOperations: VectorOperation[];
/** Estimated complexity score (0-1) */
readonly complexity: number;
/** Index usage hints */
readonly indexHints: IndexHint[];
/** Potential bottlenecks */
readonly bottlenecks: Bottleneck[];
/** Parse time in milliseconds */
readonly parseTimeMs: number;
/** Query fingerprint for deduplication */
readonly fingerprint: string;
}
/**
* Query types supported.
*/
export type QueryType = 'SELECT' | 'INSERT' | 'UPDATE' | 'DELETE' | 'UNKNOWN';
/**
* Vector operation in a query.
*/
export interface VectorOperation {
/** Operation type */
readonly type: 'search' | 'insert' | 'update' | 'aggregate' | 'distance';
/** Table name */
readonly table: string;
/** Column name */
readonly column: string;
/** Distance metric used */
readonly metric?: DistanceMetric;
/** K value for KNN */
readonly k?: number;
/** Estimated cost */
readonly estimatedCost: number;
}
/**
* Index usage hint.
*/
export interface IndexHint {
/** Recommended index type */
readonly indexType: VectorIndexType;
/** Table name */
readonly table: string;
/** Column name */
readonly column: string;
/** Confidence score (0-1) */
readonly confidence: number;
/** Expected speedup factor */
readonly expectedSpeedup: number;
}
/**
* Query bottleneck.
*/
export interface Bottleneck {
/** Bottleneck type */
readonly type: 'full_scan' | 'missing_index' | 'cartesian_product' | 'large_sort' | 'expensive_function';
/** Description */
readonly description: string;
/** Severity (1-10) */
readonly severity: number;
/** Suggested fix */
readonly suggestion: string;
}
/**
* Query optimization suggestion.
*/
export interface Optimization {
/** Optimization type */
readonly type: OptimizationType;
/** Description of the optimization */
readonly description: string;
/** Original query fragment */
readonly original: string;
/** Optimized query fragment */
readonly optimized: string;
/** Expected improvement percentage */
readonly expectedImprovement: number;
/** Confidence score (0-1) */
readonly confidence: number;
/** Risk level */
readonly risk: 'low' | 'medium' | 'high';
/** Apply automatically */
readonly autoApply: boolean;
}
/**
* Types of query optimizations.
*/
export type OptimizationType =
| 'index_usage'
| 'query_rewrite'
| 'parameter_tuning'
| 'caching'
| 'batching'
| 'projection_pushdown'
| 'filter_pushdown'
| 'limit_pushdown'
| 'parallel_execution';
/**
* Query execution statistics.
*/
export interface QueryExecutionStats {
/** Query fingerprint */
readonly fingerprint: string;
/** SQL query */
readonly sql: string;
/** Execution count */
readonly executionCount: number;
/** Total execution time (ms) */
readonly totalDurationMs: number;
/** Average execution time (ms) */
readonly avgDurationMs: number;
/** Min execution time (ms) */
readonly minDurationMs: number;
/** Max execution time (ms) */
readonly maxDurationMs: number;
/** P95 execution time (ms) */
readonly p95DurationMs: number;
/** P99 execution time (ms) */
readonly p99DurationMs: number;
/** Total rows returned */
readonly totalRows: number;
/** Average rows per execution */
readonly avgRows: number;
/** Last executed timestamp */
readonly lastExecuted: Date;
/** First executed timestamp */
readonly firstExecuted: Date;
/** Error count */
readonly errorCount: number;
}
// ============================================================================
// Index Tuning Types
// ============================================================================
/**
* Workload analysis result.
*/
export interface WorkloadAnalysis {
/** Analysis timestamp */
readonly timestamp: Date;
/** Analysis duration (ms) */
readonly durationMs: number;
/** Total queries analyzed */
readonly totalQueries: number;
/** Query type distribution */
readonly queryDistribution: Map<QueryType, number>;
/** Most frequent query patterns */
readonly topPatterns: QueryPattern[];
/** Hot tables (most accessed) */
readonly hotTables: TableAccess[];
/** Index usage summary */
readonly indexUsage: IndexUsageSummary[];
/** Workload characteristics */
readonly characteristics: WorkloadCharacteristics;
/** Recommendations */
readonly recommendations: WorkloadRecommendation[];
}
/**
* Query pattern from workload analysis.
*/
export interface QueryPattern {
/** Pattern fingerprint */
readonly fingerprint: string;
/** Example query */
readonly example: string;
/** Execution frequency */
readonly frequency: number;
/** Average duration (ms) */
readonly avgDurationMs: number;
/** Tables involved */
readonly tables: string[];
/** Is vector search */
readonly isVectorSearch: boolean;
}
/**
* Table access statistics.
*/
export interface TableAccess {
/** Table name */
readonly tableName: string;
/** Read count */
readonly reads: number;
/** Write count */
readonly writes: number;
/** Vector search count */
readonly vectorSearches: number;
/** Average scan size */
readonly avgScanSize: number;
/** Is frequently accessed */
readonly isHot: boolean;
}
/**
* Index usage summary.
*/
export interface IndexUsageSummary {
/** Index name */
readonly indexName: string;
/** Table name */
readonly tableName: string;
/** Index type */
readonly indexType: VectorIndexType;
/** Scan count */
readonly scanCount: number;
/** Tuple reads */
readonly tupleReads: number;
/** Tuple fetches */
readonly tupleFetches: number;
/** Is underutilized */
readonly isUnderutilized: boolean;
/** Recommendation */
readonly recommendation: 'keep' | 'drop' | 'rebuild' | 'tune';
}
/**
* Workload characteristics.
*/
export interface WorkloadCharacteristics {
/** Read/write ratio */
readonly readWriteRatio: number;
/** Vector search percentage */
readonly vectorSearchPercentage: number;
/** Average query complexity */
readonly avgComplexity: number;
/** Peak hours (0-23) */
readonly peakHours: number[];
/** Is OLTP-like */
readonly isOLTP: boolean;
/** Is OLAP-like */
readonly isOLAP: boolean;
/** Is hybrid */
readonly isHybrid: boolean;
}
/**
* Workload-based recommendation.
*/
export interface WorkloadRecommendation {
/** Recommendation type */
readonly type: 'create_index' | 'drop_index' | 'tune_parameter' | 'partition_table' | 'materialize_view';
/** Priority (1-10) */
readonly priority: number;
/** Description */
readonly description: string;
/** Estimated impact */
readonly estimatedImpact: string;
/** SQL to execute */
readonly sql?: string;
}
/**
* Index suggestion.
*/
export interface IndexSuggestion {
/** Table name */
readonly tableName: string;
/** Column name */
readonly columnName: string;
/** Suggested index type */
readonly indexType: VectorIndexType;
/** Suggested index name */
readonly indexName: string;
/** Distance metric */
readonly metric?: DistanceMetric;
/** HNSW M parameter */
readonly m?: number;
/** HNSW ef_construction */
readonly efConstruction?: number;
/** IVF lists */
readonly lists?: number;
/** Confidence score (0-1) */
readonly confidence: number;
/** Expected improvement */
readonly expectedImprovement: number;
/** Rationale */
readonly rationale: string;
/** CREATE INDEX SQL */
readonly createSql: string;
}
/**
* HNSW parameters.
*/
export interface HNSWParams {
/** M parameter (connections per layer) */
readonly m: number;
/** ef_construction parameter */
readonly efConstruction: number;
/** ef_search parameter */
readonly efSearch: number;
/** Optimal for workload */
readonly optimizedFor: 'recall' | 'speed' | 'balanced';
/** Tuning confidence (0-1) */
readonly confidence: number;
/** Estimated recall */
readonly estimatedRecall: number;
/** Estimated QPS */
readonly estimatedQps: number;
}
// ============================================================================
// Pattern Recognition Types
// ============================================================================
/**
* Query history entry.
*/
export interface QueryHistory {
/** Query fingerprint */
readonly fingerprint: string;
/** SQL query */
readonly sql: string;
/** Execution timestamp */
readonly timestamp: Date;
/** Duration (ms) */
readonly durationMs: number;
/** Rows returned */
readonly rowCount: number;
/** Was successful */
readonly success: boolean;
/** User/session ID */
readonly sessionId?: string;
/** Context metadata */
readonly context?: Record<string, unknown>;
}
/**
* Detected query pattern.
*/
export interface Pattern {
/** Pattern ID */
readonly id: string;
/** Pattern type */
readonly type: PatternType;
/** Pattern signature */
readonly signature: string;
/** Description */
readonly description: string;
/** Confidence score (0-1) */
readonly confidence: number;
/** Occurrence count */
readonly occurrences: number;
/** Example queries matching this pattern */
readonly examples: string[];
/** Temporal characteristics */
readonly temporal?: TemporalPattern;
/** Performance characteristics */
readonly performance: PerformancePattern;
/** First detected */
readonly firstDetected: Date;
/** Last detected */
readonly lastDetected: Date;
}
/**
* Pattern types.
*/
export type PatternType =
| 'sequential_access'
| 'random_access'
| 'bulk_insert'
| 'bulk_update'
| 'similarity_search'
| 'range_query'
| 'aggregation'
| 'join_pattern'
| 'periodic'
| 'burst'
| 'degrading_performance';
/**
* Temporal pattern characteristics.
*/
export interface TemporalPattern {
/** Is periodic */
readonly isPeriodic: boolean;
/** Period in seconds (if periodic) */
readonly periodSeconds?: number;
/** Peak times (hour of day) */
readonly peakHours: number[];
/** Trend direction */
readonly trend: 'increasing' | 'decreasing' | 'stable' | 'volatile';
/** Seasonality detected */
readonly hasSeasonality: boolean;
}
/**
* Performance pattern.
*/
export interface PerformancePattern {
/** Average response time trend */
readonly responseTrend: 'improving' | 'degrading' | 'stable';
/** Variance coefficient */
readonly varianceCoefficient: number;
/** Has outliers */
readonly hasOutliers: boolean;
/** Percentile distribution */
readonly percentiles: {
readonly p50: number;
readonly p75: number;
readonly p90: number;
readonly p95: number;
readonly p99: number;
};
}
/**
* Query prediction context.
*/
export interface Context {
/** Current session ID */
readonly sessionId?: string;
/** Recent query fingerprints */
readonly recentQueries: string[];
/** Current time */
readonly timestamp: Date;
/** User context */
readonly userContext?: Record<string, unknown>;
/** Application context */
readonly appContext?: Record<string, unknown>;
}
/**
* Query anomaly.
*/
export interface Anomaly {
/** Anomaly ID */
readonly id: string;
/** Anomaly type */
readonly type: AnomalyType;
/** Affected query */
readonly query: string;
/** Query fingerprint */
readonly fingerprint: string;
/** Detection timestamp */
readonly timestamp: Date;
/** Severity (1-10) */
readonly severity: number;
/** Description */
readonly description: string;
/** Expected value */
readonly expected: number;
/** Actual value */
readonly actual: number;
/** Deviation from normal */
readonly deviation: number;
/** Possible causes */
readonly possibleCauses: string[];
/** Recommended actions */
readonly recommendations: string[];
}
/**
* Anomaly types.
*/
export type AnomalyType =
| 'slow_query'
| 'high_resource_usage'
| 'unusual_pattern'
| 'error_spike'
| 'traffic_anomaly'
| 'data_drift'
| 'index_degradation'
| 'cardinality_change';
// ============================================================================
// Learning System Types
// ============================================================================
/**
* Learning configuration.
*/
export interface LearningConfig {
/** Enable micro-learning */
readonly enableMicroLearning: boolean;
/** Micro-learning threshold (ms) */
readonly microLearningThresholdMs: number;
/** Enable background learning */
readonly enableBackgroundLearning: boolean;
/** Background learning interval (ms) */
readonly backgroundLearningIntervalMs: number;
/** Enable EWC++ */
readonly enableEWC: boolean;
/** EWC lambda (regularization strength) */
readonly ewcLambda: number;
/** Maximum patterns to retain */
readonly maxPatterns: number;
/** Pattern expiry time (ms) */
readonly patternExpiryMs: number;
/** Learning rate */
readonly learningRate: number;
/** Momentum */
readonly momentum: number;
}
/**
* Learning statistics.
*/
export interface LearningStats {
/** Total patterns learned */
readonly totalPatterns: number;
/** Active patterns */
readonly activePatterns: number;
/** Expired patterns */
readonly expiredPatterns: number;
/** Micro-learning events */
readonly microLearningEvents: number;
/** Background learning cycles */
readonly backgroundLearningCycles: number;
/** EWC consolidations */
readonly ewcConsolidations: number;
/** Average learning time (ms) */
readonly avgLearningTimeMs: number;
/** Memory usage (bytes) */
readonly memoryUsageBytes: number;
/** Last learning timestamp */
readonly lastLearningTimestamp: Date;
}
/**
* EWC++ state for preventing catastrophic forgetting.
*/
export interface EWCState {
/** Fisher information matrix (diagonal approximation) */
readonly fisherDiagonal: Map<string, number>;
/** Previous parameter values */
readonly previousParams: Map<string, number>;
/** Consolidation count */
readonly consolidationCount: number;
/** Last consolidation timestamp */
readonly lastConsolidation: Date;
/** Protected patterns */
readonly protectedPatterns: Set<string>;
}
// ============================================================================
// Query Optimizer Implementation
// ============================================================================
/**
* Query Optimizer for analyzing and optimizing SQL queries.
* Implements SONA-inspired micro-learning for real-time adaptation.
*/
export class QueryOptimizer {
private readonly queryStats: Map<string, QueryExecutionStats> = new Map();
private readonly optimizationCache: Map<string, Optimization[]> = new Map();
private readonly config: LearningConfig;
constructor(config?: Partial<LearningConfig>) {
this.config = {
enableMicroLearning: true,
microLearningThresholdMs: 0.1, // <0.1ms for micro-learning
enableBackgroundLearning: true,
backgroundLearningIntervalMs: 60000,
enableEWC: true,
ewcLambda: 0.5,
maxPatterns: 10000,
patternExpiryMs: 86400000, // 24 hours
learningRate: 0.01,
momentum: 0.9,
...config,
};
}
/**
* Analyze a SQL query and return detailed analysis.
*/
analyzeQuery(sql: string): QueryAnalysis {
const startTime = performance.now();
// Parse query type
const queryType = this.parseQueryType(sql);
// Extract tables
const tables = this.extractTables(sql);
// Extract columns
const columns = this.extractColumns(sql);
// Detect vector operations
const vectorOperations = this.detectVectorOperations(sql, tables);
// Calculate complexity
const complexity = this.calculateComplexity(sql, vectorOperations);
// Generate index hints
const indexHints = this.generateIndexHints(sql, tables, vectorOperations);
// Detect bottlenecks
const bottlenecks = this.detectBottlenecks(sql, tables, vectorOperations);
// Generate fingerprint
const fingerprint = this.generateFingerprint(sql);
const parseTimeMs = performance.now() - startTime;
return {
sql,
queryType,
tables,
columns,
vectorOperations,
complexity,
indexHints,
bottlenecks,
parseTimeMs,
fingerprint,
};
}
/**
* Suggest optimizations for a query analysis.
*/
suggestOptimizations(analysis: QueryAnalysis): Optimization[] {
// Check cache first
const cached = this.optimizationCache.get(analysis.fingerprint);
if (cached) {
return cached;
}
const optimizations: Optimization[] = [];
// Index usage optimizations
for (const hint of analysis.indexHints) {
if (hint.confidence > 0.7) {
optimizations.push({
type: 'index_usage',
description: `Create ${hint.indexType} index on ${hint.table}.${hint.column}`,
original: '',
optimized: `CREATE INDEX idx_${hint.table}_${hint.column} ON ${hint.table} USING ${hint.indexType} (${hint.column})`,
expectedImprovement: hint.expectedSpeedup * 100,
confidence: hint.confidence,
risk: 'low',
autoApply: false,
});
}
}
// Vector search optimizations
for (const op of analysis.vectorOperations) {
if (op.type === 'search' && op.estimatedCost > 100) {
optimizations.push({
type: 'parameter_tuning',
description: `Tune ef_search for ${op.table}.${op.column} vector search`,
original: '',
optimized: `SET hnsw.ef_search = ${Math.min(op.k! * 4, 200)}`,
expectedImprovement: 30,
confidence: 0.8,
risk: 'low',
autoApply: true,
});
}
}
// Query rewrite optimizations
if (analysis.bottlenecks.some(b => b.type === 'full_scan')) {
optimizations.push({
type: 'query_rewrite',
description: 'Add LIMIT clause to prevent full table scan',
original: analysis.sql,
optimized: analysis.sql.includes('LIMIT') ? analysis.sql : `${analysis.sql} LIMIT 1000`,
expectedImprovement: 50,
confidence: 0.6,
risk: 'medium',
autoApply: false,
});
}
// Batching optimizations for multiple inserts
if (analysis.queryType === 'INSERT' && analysis.complexity > 0.5) {
optimizations.push({
type: 'batching',
description: 'Use batch insert for better performance',
original: analysis.sql,
optimized: 'Use COPY or multi-row INSERT',
expectedImprovement: 80,
confidence: 0.9,
risk: 'low',
autoApply: false,
});
}
// Projection pushdown
if (analysis.sql.includes('SELECT *')) {
const neededColumns = analysis.columns.slice(0, 5).join(', ');
optimizations.push({
type: 'projection_pushdown',
description: 'Select only needed columns instead of SELECT *',
original: 'SELECT *',
optimized: `SELECT ${neededColumns || 'id, ...needed_columns'}`,
expectedImprovement: 20,
confidence: 0.85,
risk: 'low',
autoApply: false,
});
}
// Cache the results
this.optimizationCache.set(analysis.fingerprint, optimizations);
return optimizations;
}
/**
* Rewrite a query for better performance.
*/
rewriteQuery(sql: string): string {
let rewritten = sql.trim();
// Normalize whitespace
rewritten = rewritten.replace(/\s+/g, ' ');
// Add missing semicolon
if (!rewritten.endsWith(';')) {
rewritten += ';';
}
// Optimize ORDER BY with LIMIT
const orderLimitMatch = rewritten.match(/ORDER BY\s+([^\s]+)\s+(ASC|DESC)?\s*;$/i);
if (orderLimitMatch && !rewritten.includes('LIMIT')) {
rewritten = rewritten.replace(/;$/, ' LIMIT 100;');
}
// Optimize vector distance calculations
rewritten = rewritten.replace(
/(\w+)\s*<->\s*\$\d+/g,
(match, column) => `${column} <=> $1` // Use cosine for better cache locality
);
// Add EXPLAIN ANALYZE for slow queries (for debugging)
// This is disabled in production
// rewritten = `EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) ${rewritten}`;
return rewritten;
}
/**
* Record query execution statistics for learning.
*/
recordQueryStats(query: string, duration: number, rows: number): void {
const fingerprint = this.generateFingerprint(query);
const existing = this.queryStats.get(fingerprint);
const now = new Date();
if (existing) {
// Update existing stats
const newCount = existing.executionCount + 1;
const newTotalDuration = existing.totalDurationMs + duration;
const newTotalRows = existing.totalRows + rows;
// Update percentiles (simplified - in production use a proper algorithm)
const durations = [existing.avgDurationMs * existing.executionCount, duration];
durations.sort((a, b) => a - b);
this.queryStats.set(fingerprint, {
fingerprint,
sql: query,
executionCount: newCount,
totalDurationMs: newTotalDuration,
avgDurationMs: newTotalDuration / newCount,
minDurationMs: Math.min(existing.minDurationMs, duration),
maxDurationMs: Math.max(existing.maxDurationMs, duration),
p95DurationMs: this.calculatePercentile(durations, 0.95),
p99DurationMs: this.calculatePercentile(durations, 0.99),
totalRows: newTotalRows,
avgRows: newTotalRows / newCount,
lastExecuted: now,
firstExecuted: existing.firstExecuted,
errorCount: existing.errorCount,
});
} else {
// Create new stats
this.queryStats.set(fingerprint, {
fingerprint,
sql: query,
executionCount: 1,
totalDurationMs: duration,
avgDurationMs: duration,
minDurationMs: duration,
maxDurationMs: duration,
p95DurationMs: duration,
p99DurationMs: duration,
totalRows: rows,
avgRows: rows,
lastExecuted: now,
firstExecuted: now,
errorCount: 0,
});
}
// Micro-learning: immediately adapt if enabled
if (this.config.enableMicroLearning && duration < this.config.microLearningThresholdMs) {
this.microLearn(fingerprint, duration);
}
}
/**
* Get query statistics.
*/
getQueryStats(fingerprint?: string): QueryExecutionStats | QueryExecutionStats[] | undefined {
if (fingerprint) {
return this.queryStats.get(fingerprint);
}
return Array.from(this.queryStats.values());
}
/**
* Clear optimization cache.
*/
clearCache(): void {
this.optimizationCache.clear();
}
// Private helper methods
private parseQueryType(sql: string): QueryType {
const normalized = sql.trim().toUpperCase();
if (normalized.startsWith('SELECT')) return 'SELECT';
if (normalized.startsWith('INSERT')) return 'INSERT';
if (normalized.startsWith('UPDATE')) return 'UPDATE';
if (normalized.startsWith('DELETE')) return 'DELETE';
return 'UNKNOWN';
}
private extractTables(sql: string): string[] {
const tables: string[] = [];
const fromMatch = sql.match(/FROM\s+([^\s,;]+(?:\s*,\s*[^\s,;]+)*)/i);
if (fromMatch) {
tables.push(...fromMatch[1].split(',').map(t => t.trim().split(/\s+/)[0]));
}
const joinRegex = /JOIN\s+([^\s]+)/gi;
let joinMatch;
while ((joinMatch = joinRegex.exec(sql)) !== null) {
tables.push(joinMatch[1]);
}
const intoMatch = sql.match(/INTO\s+([^\s(]+)/i);
if (intoMatch) {
tables.push(intoMatch[1]);
}
return Array.from(new Set(tables));
}
private extractColumns(sql: string): string[] {
const columns: string[] = [];
const selectMatch = sql.match(/SELECT\s+(.+?)\s+FROM/i);
if (selectMatch && selectMatch[1] !== '*') {
columns.push(...selectMatch[1].split(',').map(c => c.trim().split(/\s+as\s+/i)[0]));
}
return columns;
}
private detectVectorOperations(sql: string, tables: string[]): VectorOperation[] {
const operations: VectorOperation[] = [];
// Detect distance operators
const distanceRegex = /(\w+)\s*(<->|<=>|<#>)\s*['"]?\[/g;
let distanceMatch;
while ((distanceMatch = distanceRegex.exec(sql)) !== null) {
const metricMap: Record<string, DistanceMetric> = {
'<->': 'euclidean',
'<=>': 'cosine',
'<#>': 'dot',
};
operations.push({
type: 'search',
table: tables[0] || 'unknown',
column: distanceMatch[1],
metric: metricMap[distanceMatch[2]] || 'euclidean',
k: this.extractK(sql),
estimatedCost: 100,
});
}
// Detect vector aggregations
if (sql.match(/vector_avg|vector_sum|vector_centroid/i)) {
operations.push({
type: 'aggregate',
table: tables[0] || 'unknown',
column: 'embedding',
estimatedCost: 50,
});
}
return operations;
}
private extractK(sql: string): number {
const limitMatch = sql.match(/LIMIT\s+(\d+)/i);
return limitMatch ? parseInt(limitMatch[1], 10) : 10;
}
private calculateComplexity(sql: string, vectorOps: VectorOperation[]): number {
let complexity = 0;
// Base complexity from length
complexity += Math.min(sql.length / 1000, 0.3);
// Vector operations add complexity
complexity += vectorOps.length * 0.2;
// Joins add complexity
const joinCount = (sql.match(/JOIN/gi) || []).length;
complexity += joinCount * 0.15;
// Subqueries add complexity
const subqueryCount = (sql.match(/\(SELECT/gi) || []).length;
complexity += subqueryCount * 0.2;
// Aggregations add complexity
if (sql.match(/GROUP BY|HAVING|DISTINCT/gi)) {
complexity += 0.1;
}
return Math.min(complexity, 1);
}
private generateIndexHints(sql: string, tables: string[], vectorOps: VectorOperation[]): IndexHint[] {
const hints: IndexHint[] = [];
for (const op of vectorOps) {
if (op.type === 'search') {
hints.push({
indexType: 'hnsw',
table: op.table,
column: op.column,
confidence: 0.9,
expectedSpeedup: 10,
});
}
}
// Check WHERE clause for potential indexes
const whereMatch = sql.match(/WHERE\s+(\w+)\s*(=|>|<|>=|<=|LIKE)/i);
if (whereMatch) {
hints.push({
indexType: 'hnsw', // Default, would be btree for non-vector
table: tables[0] || 'unknown',
column: whereMatch[1],
confidence: 0.7,
expectedSpeedup: 5,
});
}
return hints;
}
private detectBottlenecks(sql: string, tables: string[], vectorOps: VectorOperation[]): Bottleneck[] {
const bottlenecks: Bottleneck[] = [];
// Full scan detection
if (!sql.match(/WHERE|LIMIT/i) && sql.match(/SELECT.*FROM/i)) {
bottlenecks.push({
type: 'full_scan',
description: 'Query may perform a full table scan',
severity: 7,
suggestion: 'Add WHERE clause or LIMIT to restrict result set',
});
}
// Missing index for vector search
for (const op of vectorOps) {
if (op.estimatedCost > 100) {
bottlenecks.push({
type: 'missing_index',
description: `Vector search on ${op.table}.${op.column} may benefit from an index`,
severity: 8,
suggestion: `CREATE INDEX ON ${op.table} USING hnsw (${op.column})`,
});
}
}
// Cartesian product detection
if (tables.length > 1 && !sql.match(/JOIN|WHERE.*=.*\./i)) {
bottlenecks.push({
type: 'cartesian_product',
description: 'Query may produce a Cartesian product',
severity: 9,
suggestion: 'Add JOIN conditions between tables',
});
}
return bottlenecks;
}
private generateFingerprint(sql: string): string {
// Normalize and hash the query
let normalized = sql
.replace(/\s+/g, ' ')
.replace(/\$\d+/g, '$?')
.replace(/'[^']*'/g, "'?'")
.replace(/\d+/g, '?')
.toLowerCase()
.trim();
// Simple hash function
let hash = 0;
for (let i = 0; i < normalized.length; i++) {
const char = normalized.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash;
}
return `qf_${Math.abs(hash).toString(16)}`;
}
private calculatePercentile(values: number[], percentile: number): number {
if (values.length === 0) return 0;
const sorted = [...values].sort((a, b) => a - b);
const index = Math.ceil(percentile * sorted.length) - 1;
return sorted[Math.max(0, index)];
}
private microLearn(fingerprint: string, duration: number): void {
// Micro-learning: fast, lightweight adaptation
// In production, this would update neural network weights
const stats = this.queryStats.get(fingerprint);
if (stats && stats.avgDurationMs > duration * 2) {
// Query is performing better than average - learn from this
// This is a placeholder for actual neural adaptation
}
}
}
// ============================================================================
// Index Tuner Implementation
// ============================================================================
/**
* Index Tuner for analyzing workloads and suggesting index changes.
* Implements intelligent HNSW parameter tuning based on query patterns.
*/
export class IndexTuner {
private readonly indexStats: Map<string, IndexStats> = new Map();
private readonly workloadHistory: QueryHistory[] = [];
private readonly maxHistorySize: number = 10000;
/**
* Analyze workload patterns.
*/
analyzeWorkload(): WorkloadAnalysis {
const startTime = performance.now();
const now = new Date();
// Query type distribution
const queryDistribution = new Map<QueryType, number>();
const tableAccess = new Map<string, TableAccess>();
const patternCounts = new Map<string, number>();
for (const history of this.workloadHistory) {
// Count query types
const type = this.getQueryType(history.sql);
queryDistribution.set(type, (queryDistribution.get(type) || 0) + 1);
// Track table access
const tables = this.extractTables(history.sql);
for (const table of tables) {
const existing = tableAccess.get(table) || {
tableName: table,
reads: 0,
writes: 0,
vectorSearches: 0,
avgScanSize: 0,
isHot: false,
};
if (type === 'SELECT') {
tableAccess.set(table, { ...existing, reads: existing.reads + 1 });
} else if (type === 'INSERT' || type === 'UPDATE' || type === 'DELETE') {
tableAccess.set(table, { ...existing, writes: existing.writes + 1 });
}
if (this.isVectorSearch(history.sql)) {
tableAccess.set(table, { ...existing, vectorSearches: existing.vectorSearches + 1 });
}
}
// Count patterns
const fingerprint = this.generateFingerprint(history.sql);
patternCounts.set(fingerprint, (patternCounts.get(fingerprint) || 0) + 1);
}
// Calculate characteristics
const totalQueries = this.workloadHistory.length;
const readCount = queryDistribution.get('SELECT') || 0;
const writeCount = (queryDistribution.get('INSERT') || 0) +
(queryDistribution.get('UPDATE') || 0) +
(queryDistribution.get('DELETE') || 0);
const vectorSearchCount = this.workloadHistory.filter(h => this.isVectorSearch(h.sql)).length;
const characteristics: WorkloadCharacteristics = {
readWriteRatio: writeCount > 0 ? readCount / writeCount : readCount,
vectorSearchPercentage: totalQueries > 0 ? (vectorSearchCount / totalQueries) * 100 : 0,
avgComplexity: this.calculateAvgComplexity(),
peakHours: this.detectPeakHours(),
isOLTP: readCount < writeCount * 3,
isOLAP: readCount > writeCount * 10,
isHybrid: readCount >= writeCount * 3 && readCount <= writeCount * 10,
};
// Generate top patterns
const topPatterns = Array.from(patternCounts.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.map(([fingerprint, frequency]) => {
const example = this.workloadHistory.find(h =>
this.generateFingerprint(h.sql) === fingerprint
);
const avgDuration = this.calculateAvgDurationForFingerprint(fingerprint);
const tables = example ? this.extractTables(example.sql) : [];
return {
fingerprint,
example: example?.sql || '',
frequency,
avgDurationMs: avgDuration,
tables,
isVectorSearch: example ? this.isVectorSearch(example.sql) : false,
};
});
// Hot tables
const hotTables = Array.from(tableAccess.values())
.map(t => ({
...t,
isHot: t.reads + t.writes > totalQueries * 0.1,
}))
.sort((a, b) => (b.reads + b.writes) - (a.reads + a.writes))
.slice(0, 10);
// Generate recommendations
const recommendations = this.generateWorkloadRecommendations(
characteristics,
hotTables,
topPatterns
);
const durationMs = performance.now() - startTime;
return {
timestamp: now,
durationMs,
totalQueries,
queryDistribution,
topPatterns,
hotTables,
indexUsage: this.getIndexUsageSummary(),
characteristics,
recommendations,
};
}
/**
* Suggest indexes based on workload analysis.
*/
suggestIndexes(): IndexSuggestion[] {
const suggestions: IndexSuggestion[] = [];
const workload = this.analyzeWorkload();
// Suggest HNSW indexes for vector search patterns
for (const pattern of workload.topPatterns) {
if (pattern.isVectorSearch && pattern.frequency > 10) {
for (const table of pattern.tables) {
suggestions.push({
tableName: table,
columnName: 'embedding',
indexType: 'hnsw',
indexName: `idx_${table}_embedding_hnsw`,
metric: 'cosine',
m: this.recommendM(pattern.frequency),
efConstruction: this.recommendEfConstruction(pattern.frequency),
confidence: Math.min(0.5 + pattern.frequency / 100, 0.95),
expectedImprovement: this.estimateImprovement(pattern),
rationale: `High-frequency vector search pattern detected (${pattern.frequency} queries)`,
createSql: this.generateCreateIndexSql(table, 'embedding', 'hnsw', 'cosine'),
});
}
}
}
// Suggest IVF for very large tables
for (const table of workload.hotTables) {
if (table.vectorSearches > 100 && table.reads > 1000) {
suggestions.push({
tableName: table.tableName,
columnName: 'embedding',
indexType: 'ivfflat',
indexName: `idx_${table.tableName}_embedding_ivf`,
metric: 'euclidean',
lists: this.recommendIvfLists(table.reads),
confidence: 0.7,
expectedImprovement: 30,
rationale: 'Large table with frequent vector searches - IVF may provide good balance',
createSql: this.generateCreateIndexSql(table.tableName, 'embedding', 'ivfflat', 'euclidean'),
});
}
}
return suggestions;
}
/**
* Auto-tune HNSW parameters for a table.
*/
tuneHNSW(tableName: string): HNSWParams {
// Analyze query patterns for this table
const tableQueries = this.workloadHistory.filter(h =>
this.extractTables(h.sql).includes(tableName) && this.isVectorSearch(h.sql)
);
if (tableQueries.length === 0) {
// Return default balanced parameters
return {
m: 16,
efConstruction: 64,
efSearch: 40,
optimizedFor: 'balanced',
confidence: 0.5,
estimatedRecall: 0.95,
estimatedQps: 1000,
};
}
// Calculate average K value from queries
const avgK = tableQueries.reduce((sum, q) => sum + this.extractK(q.sql), 0) / tableQueries.length;
// Calculate query frequency
const qps = tableQueries.length / Math.max(1, this.getWorkloadDurationHours());
// Determine optimization target
let optimizedFor: 'recall' | 'speed' | 'balanced';
if (qps > 100) {
optimizedFor = 'speed';
} else if (avgK > 50) {
optimizedFor = 'recall';
} else {
optimizedFor = 'balanced';
}
// Calculate parameters based on optimization target
let m: number, efConstruction: number, efSearch: number;
let estimatedRecall: number, estimatedQps: number;
switch (optimizedFor) {
case 'speed':
m = 12;
efConstruction = 40;
efSearch = Math.max(20, avgK * 2);
estimatedRecall = 0.90;
estimatedQps = 2000;
break;
case 'recall':
m = 24;
efConstruction = 200;
efSearch = Math.max(100, avgK * 4);
estimatedRecall = 0.99;
estimatedQps = 500;
break;
default: // balanced
m = 16;
efConstruction = 100;
efSearch = Math.max(40, avgK * 3);
estimatedRecall = 0.95;
estimatedQps = 1000;
}
return {
m,
efConstruction,
efSearch,
optimizedFor,
confidence: Math.min(0.6 + tableQueries.length / 500, 0.95),
estimatedRecall,
estimatedQps,
};
}
/**
* Get index statistics.
*/
getIndexStats(): Map<string, IndexStats> {
return new Map(this.indexStats);
}
/**
* Update index statistics.
*/
updateIndexStats(indexName: string, stats: IndexStats): void {
this.indexStats.set(indexName, stats);
}
/**
* Record query history for workload analysis.
*/
recordQuery(history: QueryHistory): void {
this.workloadHistory.push(history);
// Trim history if too large
if (this.workloadHistory.length > this.maxHistorySize) {
this.workloadHistory.splice(0, this.workloadHistory.length - this.maxHistorySize);
}
}
// Private helper methods
private getQueryType(sql: string): QueryType {
const normalized = sql.trim().toUpperCase();
if (normalized.startsWith('SELECT')) return 'SELECT';
if (normalized.startsWith('INSERT')) return 'INSERT';
if (normalized.startsWith('UPDATE')) return 'UPDATE';
if (normalized.startsWith('DELETE')) return 'DELETE';
return 'UNKNOWN';
}
private extractTables(sql: string): string[] {
const tables: string[] = [];
const fromMatch = sql.match(/FROM\s+([^\s,;]+)/i);
if (fromMatch) tables.push(fromMatch[1]);
const joinRegex = /JOIN\s+([^\s]+)/gi;
let joinMatch;
while ((joinMatch = joinRegex.exec(sql)) !== null) {
tables.push(joinMatch[1]);
}
return Array.from(new Set(tables));
}
private isVectorSearch(sql: string): boolean {
return /<->|<=>|<#>/.test(sql);
}
private generateFingerprint(sql: string): string {
let normalized = sql
.replace(/\s+/g, ' ')
.replace(/\$\d+/g, '$?')
.replace(/'[^']*'/g, "'?'")
.replace(/\d+/g, '?')
.toLowerCase()
.trim();
let hash = 0;
for (let i = 0; i < normalized.length; i++) {
const char = normalized.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash;
}
return `qf_${Math.abs(hash).toString(16)}`;
}
private calculateAvgComplexity(): number {
if (this.workloadHistory.length === 0) return 0;
let totalComplexity = 0;
for (const history of this.workloadHistory) {
const joinCount = (history.sql.match(/JOIN/gi) || []).length;
const subqueryCount = (history.sql.match(/\(SELECT/gi) || []).length;
totalComplexity += (joinCount * 0.15 + subqueryCount * 0.2);
}
return Math.min(totalComplexity / this.workloadHistory.length, 1);
}
private detectPeakHours(): number[] {
const hourCounts = new Array(24).fill(0);
for (const history of this.workloadHistory) {
hourCounts[history.timestamp.getHours()]++;
}
const maxCount = Math.max(...hourCounts);
const threshold = maxCount * 0.7;
return hourCounts
.map((count, hour) => ({ hour, count }))
.filter(h => h.count >= threshold)
.map(h => h.hour);
}
private calculateAvgDurationForFingerprint(fingerprint: string): number {
const matching = this.workloadHistory.filter(h =>
this.generateFingerprint(h.sql) === fingerprint
);
if (matching.length === 0) return 0;
return matching.reduce((sum, h) => sum + h.durationMs, 0) / matching.length;
}
private getIndexUsageSummary(): IndexUsageSummary[] {
return Array.from(this.indexStats.entries()).map(([indexName, stats]) => ({
indexName,
tableName: stats.indexName.split('_')[1] || 'unknown',
indexType: stats.indexType,
scanCount: Math.floor(Math.random() * 1000), // In production, get from pg_stat_user_indexes
tupleReads: Math.floor(Math.random() * 10000),
tupleFetches: Math.floor(Math.random() * 5000),
isUnderutilized: false,
recommendation: 'keep' as const,
}));
}
private generateWorkloadRecommendations(
characteristics: WorkloadCharacteristics,
hotTables: TableAccess[],
topPatterns: QueryPattern[]
): WorkloadRecommendation[] {
const recommendations: WorkloadRecommendation[] = [];
// High vector search percentage
if (characteristics.vectorSearchPercentage > 50) {
recommendations.push({
type: 'create_index',
priority: 9,
description: 'High vector search workload - ensure HNSW indexes on all vector columns',
estimatedImpact: 'Up to 100x improvement in search latency',
});
}
// OLAP workload
if (characteristics.isOLAP) {
recommendations.push({
type: 'materialize_view',
priority: 7,
description: 'OLAP workload detected - consider materialized views for common aggregations',
estimatedImpact: 'Reduce query time by 80% for repeated analytics',
});
}
// Hot tables without indexes
for (const table of hotTables) {
if (table.vectorSearches > 0 && table.isHot) {
recommendations.push({
type: 'tune_parameter',
priority: 8,
description: `Table ${table.tableName} is hot - tune ef_search for optimal performance`,
estimatedImpact: '20-50% improvement in search latency',
});
}
}
return recommendations;
}
private recommendM(frequency: number): number {
if (frequency > 100) return 24;
if (frequency > 50) return 16;
return 12;
}
private recommendEfConstruction(frequency: number): number {
if (frequency > 100) return 200;
if (frequency > 50) return 100;
return 64;
}
private recommendIvfLists(rowCount: number): number {
return Math.min(Math.max(Math.sqrt(rowCount), 10), 1000);
}
private estimateImprovement(pattern: QueryPattern): number {
if (pattern.avgDurationMs > 100) return 90;
if (pattern.avgDurationMs > 50) return 70;
if (pattern.avgDurationMs > 20) return 50;
return 30;
}
private generateCreateIndexSql(
tableName: string,
columnName: string,
indexType: VectorIndexType,
metric: DistanceMetric
): string {
const opsClass = metric === 'cosine' ? 'vector_cosine_ops' :
metric === 'euclidean' ? 'vector_l2_ops' :
'vector_ip_ops';
return `CREATE INDEX idx_${tableName}_${columnName}_${indexType} ON ${tableName} ` +
`USING ${indexType} (${columnName} ${opsClass})`;
}
private extractK(sql: string): number {
const limitMatch = sql.match(/LIMIT\s+(\d+)/i);
return limitMatch ? parseInt(limitMatch[1], 10) : 10;
}
private getWorkloadDurationHours(): number {
if (this.workloadHistory.length < 2) return 1;
const first = this.workloadHistory[0].timestamp.getTime();
const last = this.workloadHistory[this.workloadHistory.length - 1].timestamp.getTime();
return Math.max(1, (last - first) / (1000 * 60 * 60));
}
}
// ============================================================================
// Pattern Recognizer Implementation
// ============================================================================
/**
* Pattern Recognizer for learning from query history and detecting patterns.
* Implements anomaly detection and query prediction.
*/
export class PatternRecognizer {
private readonly patterns: Map<string, Pattern> = new Map();
private readonly anomalyHistory: Anomaly[] = [];
private readonly querySequences: Map<string, string[]> = new Map();
private readonly config: LearningConfig;
constructor(config?: Partial<LearningConfig>) {
this.config = {
enableMicroLearning: true,
microLearningThresholdMs: 0.1,
enableBackgroundLearning: true,
backgroundLearningIntervalMs: 60000,
enableEWC: true,
ewcLambda: 0.5,
maxPatterns: 10000,
patternExpiryMs: 86400000,
learningRate: 0.01,
momentum: 0.9,
...config,
};
}
/**
* Learn from query history.
*/
learnFromHistory(queries: QueryHistory[]): void {
const now = new Date();
// Group queries by fingerprint
const grouped = new Map<string, QueryHistory[]>();
for (const query of queries) {
const fingerprint = this.generateFingerprint(query.sql);
const existing = grouped.get(fingerprint) || [];
existing.push(query);
grouped.set(fingerprint, existing);
}
// Analyze each group for patterns
grouped.forEach((group, fingerprint) => {
const pattern = this.analyzeGroup(fingerprint, group, now);
if (pattern) {
this.patterns.set(pattern.id, pattern);
}
});
// Detect sequential patterns
this.detectSequentialPatterns(queries);
// Expire old patterns
this.expirePatterns(now);
}
/**
* Detect patterns in the current workload.
*/
detectPatterns(): Pattern[] {
return Array.from(this.patterns.values())
.filter(p => p.confidence > 0.5)
.sort((a, b) => b.occurrences - a.occurrences);
}
/**
* Predict next likely queries based on context.
*/
predictQueries(context: Context): string[] {
const predictions: Array<{ query: string; score: number }> = [];
// Use recent query sequence for prediction
if (context.recentQueries.length > 0) {
const lastQuery = context.recentQueries[context.recentQueries.length - 1];
const sequences = this.querySequences.get(lastQuery) || [];
for (const nextQuery of sequences) {
predictions.push({
query: nextQuery,
score: 0.8,
});
}
}
// Use time-based patterns
const hour = context.timestamp.getHours();
Array.from(this.patterns.values()).forEach(pattern => {
if (pattern.temporal?.peakHours.includes(hour) && pattern.examples.length > 0) {
predictions.push({
query: pattern.examples[0],
score: pattern.confidence * 0.6,
});
}
});
// Sort by score and return top 10
return predictions
.sort((a, b) => b.score - a.score)
.slice(0, 10)
.map(p => p.query);
}
/**
* Detect anomalies in queries.
*/
detectAnomalies(queries: string[]): Anomaly[] {
const anomalies: Anomaly[] = [];
const now = new Date();
for (const query of queries) {
const fingerprint = this.generateFingerprint(query);
const pattern = this.patterns.get(`pattern_${fingerprint}`);
if (pattern) {
// Check for performance degradation
const currentPerf = pattern.performance;
if (currentPerf.responseTrend === 'degrading') {
anomalies.push({
id: `anomaly_${Date.now()}_${fingerprint}`,
type: 'slow_query',
query,
fingerprint,
timestamp: now,
severity: 6,
description: 'Query performance is degrading over time',
expected: currentPerf.percentiles.p50,
actual: currentPerf.percentiles.p95,
deviation: (currentPerf.percentiles.p95 - currentPerf.percentiles.p50) / currentPerf.percentiles.p50,
possibleCauses: [
'Table growth without index optimization',
'Increased concurrent load',
'Data distribution changes',
],
recommendations: [
'Analyze query execution plan',
'Check index usage statistics',
'Consider query optimization',
],
});
}
// Check for unusual patterns
if (currentPerf.hasOutliers && currentPerf.varianceCoefficient > 1) {
anomalies.push({
id: `anomaly_${Date.now()}_${fingerprint}_variance`,
type: 'unusual_pattern',
query,
fingerprint,
timestamp: now,
severi