semantic-ds-toolkit
Version:
Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference
152 lines (151 loc) • 6.41 kB
JavaScript
import { BaseSQLGenerator } from './base-generator';
export class SnowflakeSQLGenerator extends BaseSQLGenerator {
generateValidationCase(rule) {
const adaptedCondition = this.adaptConditionForTarget(rule.condition);
return this.generateBaseValidationCase(rule, adaptedCondition);
}
generateCreateView(viewName, query) {
return `CREATE OR REPLACE VIEW ${this.escapeIdentifier(viewName)} AS\n${query}`;
}
generateSecureView(viewName, query) {
return `CREATE OR REPLACE SECURE VIEW ${this.escapeIdentifier(viewName)} AS\n${query}`;
}
generateCreateTable(tableName, query) {
return `CREATE OR REPLACE TABLE ${this.escapeIdentifier(tableName)} AS\n${query}`;
}
adaptDataType(dataType) {
const typeMap = {
'string': 'VARCHAR',
'int64': 'NUMBER(38,0)',
'float64': 'FLOAT',
'boolean': 'BOOLEAN',
'datetime': 'TIMESTAMP_NTZ',
'unknown': 'VARIANT'
};
return typeMap[dataType.toLowerCase()] || 'VARCHAR';
}
escapeIdentifier(identifier) {
// Snowflake uses double quotes for case-sensitive identifiers
if (/^[A-Z][A-Z0-9_]*$/.test(identifier)) {
return identifier; // No need to quote standard identifiers
}
return `"${identifier.replace(/"/g, '""')}"`;
}
formatTimestamp() {
return 'CURRENT_TIMESTAMP()';
}
supportsRegex() {
return true;
}
getRegexFunction(column, pattern) {
return `RLIKE(${column}, ${this.escapeStringLiteral(pattern)})`;
}
adaptConditionForTarget(condition) {
let adapted = condition;
// Replace regex operator
adapted = adapted.replace(/(\w+)\s*~\s*'([^']+)'/g, (match, column, pattern) => {
return this.getRegexFunction(column, pattern);
});
// Snowflake-specific adaptations
adapted = adapted.replace(/LENGTH\(/g, 'LEN(');
adapted = adapted.replace(/CURRENT_TIMESTAMP/g, 'CURRENT_TIMESTAMP()');
return adapted;
}
generateEmailValidation(columnName) {
const emailPattern = '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$';
return `${this.getRegexFunction(columnName, emailPattern)} AND LEN(${columnName}) <= 254`;
}
generatePhoneValidation(columnName) {
// Multiple phone format patterns
const patterns = [
'^\\+?[1-9]\\d{1,14}$', // E.164 format
'^\\([0-9]{3}\\)\\s[0-9]{3}-[0-9]{4}$', // US format: (555) 123-4567
'^[0-9]{3}-[0-9]{3}-[0-9]{4}$' // US format: 555-123-4567
];
const regexConditions = patterns.map(pattern => this.getRegexFunction(columnName, pattern)).join(' OR ');
return `(${regexConditions}) AND LEN(${columnName}) BETWEEN 7 AND 15`;
}
generateCurrencyValidation(columnName) {
return `${columnName} >= 0 AND ${columnName} IS NOT NULL AND ${columnName} < 999999999999.99`;
}
generateDateRangeValidation(columnName, minDate, maxDate) {
const conditions = [`${columnName} IS NOT NULL`];
if (minDate) {
conditions.push(`${columnName} >= '${minDate}'::TIMESTAMP_NTZ`);
}
if (maxDate) {
conditions.push(`${columnName} <= '${maxDate}'::TIMESTAMP_NTZ`);
}
else {
// Default: not in future
conditions.push(`${columnName} <= CURRENT_TIMESTAMP()`);
}
return conditions.join(' AND ');
}
generateCustomValidation(rule) {
switch (rule.ruleType) {
case 'null_check':
return `${rule.column} IS NOT NULL`;
case 'format_check':
if (rule.condition.includes('email')) {
return this.generateEmailValidation(rule.column);
}
if (rule.condition.includes('phone')) {
return this.generatePhoneValidation(rule.column);
}
return this.adaptConditionForTarget(rule.condition);
case 'range_check':
if (rule.condition.includes('monetary') || rule.condition.includes('currency')) {
return this.generateCurrencyValidation(rule.column);
}
return this.adaptConditionForTarget(rule.condition);
case 'foreign_key':
return this.generateForeignKeyValidation(rule);
default:
return this.adaptConditionForTarget(rule.condition);
}
}
generateForeignKeyValidation(rule) {
// Extract referenced table from condition
const match = rule.condition.match(/EXISTS\s*\(\s*SELECT\s+1\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s*=\s*(\w+)/i);
if (match) {
const [, refTable, refColumn, column] = match;
return `EXISTS (SELECT 1 FROM ${this.escapeIdentifier(refTable)} WHERE ${this.escapeIdentifier(refColumn)} = ${this.escapeIdentifier(column)})`;
}
return this.adaptConditionForTarget(rule.condition);
}
generatePerformanceOptimizedQuery(baseQuery, estimatedRows) {
let optimized = baseQuery;
// Add query optimization hints for large datasets
if (estimatedRows > 1000000) {
optimized = `-- Performance optimized query for ${estimatedRows} rows\n${optimized}`;
// Add clustering recommendation in comment
optimized = `-- Consider clustering on validation columns\n${optimized}`;
}
return optimized;
}
generateSnowflakeSpecificFeatures() {
return {
secureViews: true,
timeTravel: true,
clustering: true,
streamingSupport: true
};
}
generateStreamingValidation(tableName, streamName) {
const actualStreamName = streamName || `${tableName}_stream`;
return `
CREATE OR REPLACE STREAM ${this.escapeIdentifier(actualStreamName)}
ON TABLE ${this.escapeIdentifier(tableName)}
APPEND_ONLY = TRUE;
-- Use stream for real-time validation
CREATE OR REPLACE VIEW ${this.escapeIdentifier(tableName)}_streaming_validation AS
SELECT *,
METADATA$ACTION as _change_type,
METADATA$ISUPDATE as _is_update,
CURRENT_TIMESTAMP() as _validation_timestamp
FROM ${this.escapeIdentifier(actualStreamName)}
WHERE _semantic_status = 'PASS';`;
}
}
//# sourceMappingURL=snowflake.js.map