UNPKG

rag-cli-tester

Version:

A lightweight CLI tool for testing RAG (Retrieval-Augmented Generation) systems with different embedding combinations

github.com/Syed-Ayan-Ali/rag-testing-framework-cli

Syed-Ayan-Ali/rag-cli-tester

421 lines • 18.8 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.SQLMetric = void 0; class SQLMetric { constructor() { this.sqlKeywords = new Set([ // SQL commands 'select', 'from', 'where', 'join', 'group by', 'order by', 'having', 'insert', 'update', 'delete', 'create', 'alter', 'drop', 'index', 'union', 'intersect', 'except', 'with', 'cte', 'case', 'when', 'then', // Aggregations 'count', 'sum', 'avg', 'min', 'max', 'distinct', 'top', 'limit', // Joins 'inner join', 'left join', 'right join', 'full join', 'cross join', 'natural join', 'using', 'on', // Conditions 'and', 'or', 'not', 'in', 'exists', 'between', 'like', 'ilike', 'is null', 'is not null', '>', '<', '>=', '<=', '=', '!=' ]); this.tableKeywords = new Set([ 'table', 'schema', 'database', 'view', 'temp', 'temporary', 'partition', 'index', 'constraint', 'foreign key', 'primary key', 'unique', 'check', 'default' ]); this.businessTerms = new Set([ // Company/Performance terms 'company', 'corporation', 'enterprise', 'business', 'firm', 'organization', 'performance', 'revenue', 'profit', 'income', 'earnings', 'growth', 'market', 'share', 'stock', 'equity', 'valuation', 'market cap', // Financial metrics 'roi', 'roa', 'roe', 'ebitda', 'ebit', 'net income', 'gross profit', 'operating income', 'cash flow', 'assets', 'liabilities', 'debt', // Market terms 'sector', 'industry', 'market', 'exchange', 'nasdaq', 'nyse', 'ftse', 'dow jones', 's&p', 'index', 'benchmark', 'trend', 'volatility' ]); } calculate(expected, actual, similarity) { const expectedLower = expected.toLowerCase(); const actualLower = actual.toLowerCase(); // Calculate SQL accuracy (how well the generated SQL matches expected) const sqlAccuracy = this.calculateSQLAccuracy(expectedLower, actualLower); // Calculate table relevance (how relevant the tables are to the query) const tableRelevance = this.calculateTableRelevance(expectedLower, actualLower); // Calculate semantic understanding (how well the intent is captured) const semanticUnderstanding = this.calculateSemanticUnderstanding(expectedLower, actualLower); // Calculate query completeness (how complete the SQL query is) const queryCompleteness = this.calculateQueryCompleteness(actualLower); // Calculate syntax correctness (basic SQL syntax validation) const syntaxCorrectness = this.calculateSyntaxCorrectness(actualLower); // Use embedding similarity as a confidence factor const similarityFactor = Math.max(0, (similarity + 1) / 2); // Calculate new semantic equivalence metrics const tablePresence = this.calculateTablePresence(expectedLower, actualLower); const columnPresence = this.calculateColumnPresence(expectedLower, actualLower); const joinAggregation = this.calculateJoinAggregation(actualLower); const queryParsability = this.calculateQueryParsability(actualLower); const queryDifference = this.calculateQueryDifference(expectedLower, actualLower); const semanticEquivalence = this.calculateSemanticEquivalence(expectedLower, actualLower); // Weighted combination for overall score (updated weights) const overallScore = (sqlAccuracy * 0.20 + tableRelevance * 0.15 + semanticUnderstanding * 0.15 + queryCompleteness * 0.10 + syntaxCorrectness * 0.05 + tablePresence * 0.15 + columnPresence * 0.10 + joinAggregation * 0.05 + queryParsability * 0.03 + (1 - queryDifference) * 0.02 + // Invert since lower difference is better semanticEquivalence * 0.10); // Calculate confidence based on multiple factors const confidence = this.calculateConfidence(expectedLower, actualLower, similarityFactor, queryCompleteness, syntaxCorrectness); return { overallScore: Math.min(1.0, Math.max(0, overallScore)), sqlAccuracy, tableRelevance, semanticUnderstanding, queryCompleteness, syntaxCorrectness, tablePresence, columnPresence, joinAggregation, queryParsability, queryDifference, semanticEquivalence, confidence }; } calculateSQLAccuracy(expected, actual) { // Extract SQL components and compare const expectedComponents = this.extractSQLComponents(expected); const actualComponents = this.extractSQLComponents(actual); if (expectedComponents.size === 0 && actualComponents.size === 0) return 1.0; if (expectedComponents.size === 0 || actualComponents.size === 0) return 0.0; const intersection = new Set([...expectedComponents].filter(x => actualComponents.has(x))); const union = new Set([...expectedComponents, ...actualComponents]); return intersection.size / union.size; } calculateTableRelevance(expected, actual) { // Check if relevant tables are mentioned const expectedTables = this.extractTableReferences(expected); const actualTables = this.extractTableReferences(actual); if (expectedTables.size === 0 && actualTables.size === 0) return 1.0; if (expectedTables.size === 0 || actualTables.size === 0) return 0.0; const intersection = new Set([...expectedTables].filter(x => actualTables.has(x))); const union = new Set([...expectedTables, ...actualTables]); return intersection.size / union.size; } calculateSemanticUnderstanding(expected, actual) { // Check if business terms and intent are captured const expectedTerms = this.extractBusinessTerms(expected); const actualTerms = this.extractBusinessTerms(actual); if (expectedTerms.size === 0 && actualTerms.size === 0) return 1.0; if (expectedTerms.size === 0 || actualTerms.size === 0) return 0.0; const intersection = new Set([...expectedTerms].filter(x => actualTerms.has(x))); const union = new Set([...expectedTerms, ...actualTerms]); return intersection.size / union.size; } calculateQueryCompleteness(sql) { // Check if SQL has essential components const components = new Set(); if (sql.includes('select')) components.add('select'); if (sql.includes('from')) components.add('from'); if (sql.includes('where') || sql.includes('join')) components.add('filtering'); if (sql.includes('group by') || sql.includes('order by')) components.add('aggregation'); // Basic completeness score const essentialComponents = ['select', 'from']; const essentialCount = essentialComponents.filter(comp => components.has(comp)).length; return essentialCount / essentialComponents.length; } calculateSyntaxCorrectness(sql) { // Basic SQL syntax validation let score = 0; let totalChecks = 0; // Check for balanced parentheses const openParens = (sql.match(/\(/g) || []).length; const closeParens = (sql.match(/\)/g) || []).length; if (openParens === closeParens) score += 1; totalChecks++; // Check for balanced quotes const singleQuotes = (sql.match(/'/g) || []).length; const doubleQuotes = (sql.match(/"/g) || []).length; if (singleQuotes % 2 === 0 && doubleQuotes % 2 === 0) score += 1; totalChecks++; // Check for valid SQL keywords const hasValidKeywords = Array.from(this.sqlKeywords).some(keyword => sql.includes(keyword)); if (hasValidKeywords) score += 1; totalChecks++; return totalChecks > 0 ? score / totalChecks : 0; } calculateConfidence(expected, actual, similarity, completeness, syntax) { // Higher confidence for more complete and syntactically correct queries const lengthFactor = Math.min(1.0, Math.min(expected.length, actual.length) / 200); const similarityFactor = similarity; const completenessFactor = completeness; const syntaxFactor = syntax; return (lengthFactor * 0.2 + similarityFactor * 0.3 + completenessFactor * 0.3 + syntaxFactor * 0.2); } extractSQLComponents(sql) { const components = new Set(); // Extract SQL keywords for (const keyword of this.sqlKeywords) { if (sql.includes(keyword)) { components.add(keyword); } } // Extract table names (simple heuristic) const tableMatches = sql.match(/\bfrom\s+(\w+)/gi); if (tableMatches) { tableMatches.forEach(match => { const tableName = match.replace(/\bfrom\s+/i, '').trim(); if (tableName) components.add(`table:${tableName}`); }); } return components; } extractTableReferences(text) { const tables = new Set(); // Look for table-related keywords for (const keyword of this.tableKeywords) { if (text.includes(keyword)) { tables.add(keyword); } } // Extract potential table names const tableMatches = text.match(/\b\w+_table\b|\btable_\w+\b|\b\w+_data\b|\bdata_\w+\b/gi); if (tableMatches) { tableMatches.forEach(match => tables.add(match.toLowerCase())); } return tables; } extractBusinessTerms(text) { const terms = new Set(); for (const term of this.businessTerms) { if (text.includes(term)) { terms.add(term); } } return terms; } calculateTablePresence(expected, actual) { // Extract table names from expected and actual SQL const expectedTables = this.extractTableNames(expected); const actualTables = this.extractTableNames(actual); if (expectedTables.size === 0 && actualTables.size === 0) return 1.0; if (expectedTables.size === 0 || actualTables.size === 0) return 0.0; const intersection = new Set([...expectedTables].filter(x => actualTables.has(x))); const union = new Set([...expectedTables, ...actualTables]); return intersection.size / union.size; } calculateColumnPresence(expected, actual) { // Extract column names from SELECT clauses const expectedColumns = this.extractColumnNames(expected); const actualColumns = this.extractColumnNames(actual); if (expectedColumns.size === 0 && actualColumns.size === 0) return 1.0; if (expectedColumns.size === 0 || actualColumns.size === 0) return 0.0; const intersection = new Set([...expectedColumns].filter(x => actualColumns.has(x))); const union = new Set([...expectedColumns, ...actualColumns]); return intersection.size / union.size; } calculateJoinAggregation(sql) { let score = 0; let totalChecks = 0; // Check for JOINs if (sql.includes('join')) { score += 1; // Check if JOIN is properly structured if (sql.includes('on') || sql.includes('using')) score += 0.5; } totalChecks++; // Check for aggregations if (sql.includes('group by') || sql.includes('having')) score += 1; if (sql.includes('count') || sql.includes('sum') || sql.includes('avg') || sql.includes('min') || sql.includes('max')) score += 1; totalChecks++; // Check for ORDER BY (often used with aggregations) if (sql.includes('order by')) score += 0.5; totalChecks++; return totalChecks > 0 ? score / totalChecks : 0; } calculateQueryParsability(sql) { let score = 0; let totalChecks = 0; // Basic SQL structure checks if (sql.includes('select') && sql.includes('from')) score += 1; totalChecks++; // Check for balanced parentheses const openParens = (sql.match(/\(/g) || []).length; const closeParens = (sql.match(/\)/g) || []).length; if (openParens === closeParens) score += 1; totalChecks++; // Check for balanced quotes const singleQuotes = (sql.match(/'/g) || []).length; const doubleQuotes = (sql.match(/"/g) || []).length; if (singleQuotes % 2 === 0 && doubleQuotes % 2 === 0) score += 1; totalChecks++; // Check for proper SQL keyword usage const hasValidKeywords = Array.from(this.sqlKeywords).some(keyword => sql.includes(keyword)); if (hasValidKeywords) score += 1; totalChecks++; return totalChecks > 0 ? score / totalChecks : 0; } calculateQueryDifference(expected, actual) { // Calculate Levenshtein distance normalized by max length const distance = this.levenshteinDistance(expected, actual); const maxLength = Math.max(expected.length, actual.length); return maxLength > 0 ? distance / maxLength : 0; } calculateSemanticEquivalence(expected, actual) { // This is a simplified semantic equivalence check // In a real implementation, you might use more sophisticated NLP techniques // Check if both queries have similar structure const expectedStructure = this.extractQueryStructure(expected); const actualStructure = this.extractQueryStructure(actual); if (expectedStructure === actualStructure) return 1.0; // Check if they're semantically similar (e.g., both are SELECT queries with similar patterns) const expectedType = this.classifyQueryType(expected); const actualType = this.classifyQueryType(actual); if (expectedType === actualType) return 0.8; // Check if they're in the same category if (this.areQueriesSimilar(expected, actual)) return 0.6; return 0.3; } extractTableNames(sql) { const tables = new Set(); // Extract table names from FROM and JOIN clauses const fromMatches = sql.match(/\bfrom\s+(\w+)/gi); if (fromMatches) { fromMatches.forEach(match => { const tableName = match.replace(/\bfrom\s+/i, '').trim(); if (tableName) tables.add(tableName.toLowerCase()); }); } const joinMatches = sql.match(/\bjoin\s+(\w+)/gi); if (joinMatches) { joinMatches.forEach(match => { const tableName = match.replace(/\bjoin\s+/i, '').trim(); if (tableName) tables.add(tableName.toLowerCase()); }); } return tables; } extractColumnNames(sql) { const columns = new Set(); // Extract column names from SELECT clause const selectMatch = sql.match(/select\s+(.*?)\s+from/i); if (selectMatch) { const selectClause = selectMatch[1]; // Simple column extraction - split by comma and clean const columnList = selectClause.split(',').map(col => col.trim().replace(/\s+as\s+\w+/i, '').replace(/^\w+\./, '')); columnList.forEach(col => { if (col && col !== '*' && !col.includes('(')) { columns.add(col.toLowerCase()); } }); } return columns; } levenshteinDistance(str1, str2) { const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null)); for (let i = 0; i <= str1.length; i++) matrix[0][i] = i; for (let j = 0; j <= str2.length; j++) matrix[j][0] = j; for (let j = 1; j <= str2.length; j++) { for (let i = 1; i <= str1.length; i++) { const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1; matrix[j][i] = Math.min(matrix[j][i - 1] + 1, // deletion matrix[j - 1][i] + 1, // insertion matrix[j - 1][i - 1] + indicator // substitution ); } } return matrix[str2.length][str1.length]; } extractQueryStructure(sql) { // Extract the basic structure of the query const structure = sql.toLowerCase() .replace(/\w+/g, 'T') // Replace words with T (token) .replace(/[^T\s()]/g, '') // Remove non-token characters .replace(/\s+/g, ' ') // Normalize whitespace .trim(); return structure; } classifyQueryType(sql) { const lowerSql = sql.toLowerCase(); if (lowerSql.includes('count') || lowerSql.includes('sum') || lowerSql.includes('avg')) { return 'aggregation'; } else if (lowerSql.includes('join')) { return 'join'; } else if (lowerSql.includes('where') && lowerSql.includes('and')) { return 'filtered'; } else if (lowerSql.includes('order by')) { return 'ordered'; } else { return 'simple'; } } areQueriesSimilar(expected, actual) { // Check if queries are in similar categories const expectedType = this.classifyQueryType(expected); const actualType = this.classifyQueryType(actual); if (expectedType === actualType) return true; // Check if they're semantically related const expectedTables = this.extractTableNames(expected); const actualTables = this.extractTableNames(actual); if (expectedTables.size > 0 && actualTables.size > 0) { const intersection = new Set([...expectedTables].filter(x => actualTables.has(x))); return intersection.size > 0; } return false; } getName() { return 'SQL'; } getDescription() { return 'Text-to-SQL evaluation metric that measures SQL accuracy, table relevance, semantic understanding, and query completeness for database query generation'; } } exports.SQLMetric = SQLMetric; //# sourceMappingURL=sql-metric.js.map