UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

87 lines 3.04 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const drift_1 = require("../drift"); function nowIso() { return new Date().toISOString(); } function synthNormal(n, mean = 0, std = 1) { const out = []; for (let i = 0; i < n; i++) { const u = Math.random(); const v = Math.random(); const z = Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v); out.push(mean + std * z); } return out; } function fpFrom(values) { const min = Math.min(...values); const max = Math.max(...values); const uniq = new Set(values); return { dtype: 'float64', cardinality: uniq.size, regex_patterns: [], null_ratio: 0, unique_ratio: uniq.size / values.length, sample_values: values.slice(0, 20).map(String), min, max, }; } function anchorFor(name, fp) { const ts = nowIso(); return { dataset: 'eval', column_name: name, anchor_id: `eval:${name}`, fingerprint: JSON.stringify(fp), first_seen: ts, last_seen: ts, confidence: 0.95, }; } async function evalNumericTrials(trials = 50) { const detector = new drift_1.DriftDetector({ enable_performance_mode: true }); let fpCount = 0; // false positive let tpCount = 0; // true positive // False positives: same distribution for (let i = 0; i < trials; i++) { const base = synthNormal(20000, 100, 15); const cur = synthNormal(20000, 100, 15); const baseFp = fpFrom(base); const curFp = fpFrom(cur); const anchor = anchorFor('stable_metric', baseFp); const col = { name: 'stable_metric', values: cur, data_type: 'float64' }; const res = await detector.detectDrift(anchor, col, curFp); if (res.drift_detected) fpCount++; } // True positives: significant mean shift for (let i = 0; i < trials; i++) { const base = synthNormal(20000, 50, 10); const cur = synthNormal(20000, 200, 10); const baseFp = fpFrom(base); const curFp = fpFrom(cur); const anchor = anchorFor('mean_shift_metric', baseFp); const col = { name: 'mean_shift_metric', values: cur, data_type: 'float64' }; const res = await detector.detectDrift(anchor, col, curFp); if (res.drift_detected) tpCount++; } return { trials, false_positive_rate: fpCount / trials, true_positive_rate: tpCount / trials, }; } async function main() { const stats = new drift_1.StatisticalTests(); // Quick check of stats utils const ks = stats.kolmogorovSmirnovTest([1, 2, 3, 4], [10, 11, 12, 13]); console.log('KS sanity:', { statistic: ks.statistic.toFixed(4), p: ks.p_value.toFixed(4) }); const numeric = await evalNumericTrials(30); console.log('Drift eval summary:', numeric); } main().catch(err => { console.error('drift-eval error', err); process.exit(1); }); //# sourceMappingURL=drift-eval.js.map