UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

103 lines 3.93 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const fs_1 = require("fs"); const path_1 = require("path"); const drift_1 = require("../drift"); async function ensureDir(path) { try { await fs_1.promises.mkdir(path, { recursive: true }); } catch { } } function nowIso() { return new Date().toISOString(); } function synthNumeric(size = 10000, mean = 0, std = 1) { const values = []; for (let i = 0; i < size; i++) { // Box-Muller const u = Math.random(); const v = Math.random(); const z = Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v); values.push(mean + std * z); } return values; } function fingerprintFromNumeric(values) { const min = Math.min(...values); const max = Math.max(...values); const unique = new Set(values); return { dtype: 'float64', cardinality: unique.size, regex_patterns: [], null_ratio: 0, unique_ratio: unique.size / values.length, sample_values: values.slice(0, 20).map(String), min, max, }; } function anchorFromFingerprint(dataset, name, fp) { const ts = nowIso(); return { dataset, column_name: name, anchor_id: `${dataset}:${name}`, fingerprint: JSON.stringify(fp), first_seen: ts, last_seen: ts, confidence: 0.95, }; } async function main() { const outDir = 'evidence-logs'; await ensureDir(outDir); // Attempt to load baseline/current from local JSON if available // Fallback to synthetic data demonstrating both stable and drifted scenarios let scenarios = []; try { const raw = await fs_1.promises.readFile('test-data/drift-scenarios.json', 'utf8'); const parsed = JSON.parse(raw); if (Array.isArray(parsed) && parsed.length > 0) { scenarios = parsed; } } catch { } if (scenarios.length === 0) { // Synthetic: stable and drifted numeric columns const base = synthNumeric(50000, 100, 10); const baseFp = fingerprintFromNumeric(base); const baseAnchor = anchorFromFingerprint('synthetic', 'metric_stable', baseFp); const baseColumn = { name: 'metric_stable', values: base, data_type: 'float64' }; const drifted = synthNumeric(50000, 1000, 10); // mean shift 10x const driftFp = fingerprintFromNumeric(drifted); const driftAnchor = anchorFromFingerprint('synthetic', 'metric_drifted', baseFp); const driftColumn = { name: 'metric_drifted', values: drifted, data_type: 'float64' }; scenarios.push({ name: 'stable', anchor: baseAnchor, column: baseColumn, fingerprint: baseFp }, { name: 'distribution_drift', anchor: driftAnchor, column: driftColumn, fingerprint: driftFp }); } const detector = new drift_1.PerformanceOptimizedDriftDetector({ enable_performance_mode: true }); const results = []; for (const s of scenarios) { const res = await detector.detectDriftFast(s.anchor, s.column, s.fingerprint); results.push({ scenario: s.name, result: res }); } const ts = new Date(); const file = (0, path_1.join)(outDir, `drift-monitor-${ts.toISOString().replace(/[:]/g, '-')}.json`); await fs_1.promises.writeFile(file, JSON.stringify({ generated_at: nowIso(), results }, null, 2), 'utf8'); // Console summary for logs const summary = results.map(r => ({ scenario: r.scenario, drift: r.result.drift_detected, severity: r.result.severity, types: r.result.drift_types.map((d) => d.type), time_ms: r.result.performance_metrics?.detection_time_ms, })); console.log('Drift monitor summary:', summary); console.log('Saved:', file); } main().catch(err => { console.error('drift-monitor error', err); process.exit(1); }); //# sourceMappingURL=drift-monitor.js.map