UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

152 lines (151 loc) 6.41 kB
import { BaseSQLGenerator } from './base-generator'; export class SnowflakeSQLGenerator extends BaseSQLGenerator { generateValidationCase(rule) { const adaptedCondition = this.adaptConditionForTarget(rule.condition); return this.generateBaseValidationCase(rule, adaptedCondition); } generateCreateView(viewName, query) { return `CREATE OR REPLACE VIEW ${this.escapeIdentifier(viewName)} AS\n${query}`; } generateSecureView(viewName, query) { return `CREATE OR REPLACE SECURE VIEW ${this.escapeIdentifier(viewName)} AS\n${query}`; } generateCreateTable(tableName, query) { return `CREATE OR REPLACE TABLE ${this.escapeIdentifier(tableName)} AS\n${query}`; } adaptDataType(dataType) { const typeMap = { 'string': 'VARCHAR', 'int64': 'NUMBER(38,0)', 'float64': 'FLOAT', 'boolean': 'BOOLEAN', 'datetime': 'TIMESTAMP_NTZ', 'unknown': 'VARIANT' }; return typeMap[dataType.toLowerCase()] || 'VARCHAR'; } escapeIdentifier(identifier) { // Snowflake uses double quotes for case-sensitive identifiers if (/^[A-Z][A-Z0-9_]*$/.test(identifier)) { return identifier; // No need to quote standard identifiers } return `"${identifier.replace(/"/g, '""')}"`; } formatTimestamp() { return 'CURRENT_TIMESTAMP()'; } supportsRegex() { return true; } getRegexFunction(column, pattern) { return `RLIKE(${column}, ${this.escapeStringLiteral(pattern)})`; } adaptConditionForTarget(condition) { let adapted = condition; // Replace regex operator adapted = adapted.replace(/(\w+)\s*~\s*'([^']+)'/g, (match, column, pattern) => { return this.getRegexFunction(column, pattern); }); // Snowflake-specific adaptations adapted = adapted.replace(/LENGTH\(/g, 'LEN('); adapted = adapted.replace(/CURRENT_TIMESTAMP/g, 'CURRENT_TIMESTAMP()'); return adapted; } generateEmailValidation(columnName) { const emailPattern = '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$'; return `${this.getRegexFunction(columnName, emailPattern)} AND LEN(${columnName}) <= 254`; } generatePhoneValidation(columnName) { // Multiple phone format patterns const patterns = [ '^\\+?[1-9]\\d{1,14}$', // E.164 format '^\\([0-9]{3}\\)\\s[0-9]{3}-[0-9]{4}$', // US format: (555) 123-4567 '^[0-9]{3}-[0-9]{3}-[0-9]{4}$' // US format: 555-123-4567 ]; const regexConditions = patterns.map(pattern => this.getRegexFunction(columnName, pattern)).join(' OR '); return `(${regexConditions}) AND LEN(${columnName}) BETWEEN 7 AND 15`; } generateCurrencyValidation(columnName) { return `${columnName} >= 0 AND ${columnName} IS NOT NULL AND ${columnName} < 999999999999.99`; } generateDateRangeValidation(columnName, minDate, maxDate) { const conditions = [`${columnName} IS NOT NULL`]; if (minDate) { conditions.push(`${columnName} >= '${minDate}'::TIMESTAMP_NTZ`); } if (maxDate) { conditions.push(`${columnName} <= '${maxDate}'::TIMESTAMP_NTZ`); } else { // Default: not in future conditions.push(`${columnName} <= CURRENT_TIMESTAMP()`); } return conditions.join(' AND '); } generateCustomValidation(rule) { switch (rule.ruleType) { case 'null_check': return `${rule.column} IS NOT NULL`; case 'format_check': if (rule.condition.includes('email')) { return this.generateEmailValidation(rule.column); } if (rule.condition.includes('phone')) { return this.generatePhoneValidation(rule.column); } return this.adaptConditionForTarget(rule.condition); case 'range_check': if (rule.condition.includes('monetary') || rule.condition.includes('currency')) { return this.generateCurrencyValidation(rule.column); } return this.adaptConditionForTarget(rule.condition); case 'foreign_key': return this.generateForeignKeyValidation(rule); default: return this.adaptConditionForTarget(rule.condition); } } generateForeignKeyValidation(rule) { // Extract referenced table from condition const match = rule.condition.match(/EXISTS\s*\(\s*SELECT\s+1\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s*=\s*(\w+)/i); if (match) { const [, refTable, refColumn, column] = match; return `EXISTS (SELECT 1 FROM ${this.escapeIdentifier(refTable)} WHERE ${this.escapeIdentifier(refColumn)} = ${this.escapeIdentifier(column)})`; } return this.adaptConditionForTarget(rule.condition); } generatePerformanceOptimizedQuery(baseQuery, estimatedRows) { let optimized = baseQuery; // Add query optimization hints for large datasets if (estimatedRows > 1000000) { optimized = `-- Performance optimized query for ${estimatedRows} rows\n${optimized}`; // Add clustering recommendation in comment optimized = `-- Consider clustering on validation columns\n${optimized}`; } return optimized; } generateSnowflakeSpecificFeatures() { return { secureViews: true, timeTravel: true, clustering: true, streamingSupport: true }; } generateStreamingValidation(tableName, streamName) { const actualStreamName = streamName || `${tableName}_stream`; return ` CREATE OR REPLACE STREAM ${this.escapeIdentifier(actualStreamName)} ON TABLE ${this.escapeIdentifier(tableName)} APPEND_ONLY = TRUE; -- Use stream for real-time validation CREATE OR REPLACE VIEW ${this.escapeIdentifier(tableName)}_streaming_validation AS SELECT *, METADATA$ACTION as _change_type, METADATA$ISUPDATE as _is_update, CURRENT_TIMESTAMP() as _validation_timestamp FROM ${this.escapeIdentifier(actualStreamName)} WHERE _semantic_status = 'PASS';`; } } //# sourceMappingURL=snowflake.js.map