UNPKG

aiwg

Version:

Deployment tool and support utility for AI context. Copies agents, skills, commands, rules, and behaviors into the paths each AI platform reads (Claude Code, Codex, Copilot, Cursor, Warp, OpenClaw, and 6 more) so one source of truth works across 10 platfo

174 lines 6.04 kB
/** * Feedback A/B Testing Framework * * Provides significance testing for comparing feedback strategies * using Welch's t-test and Cohen's d effect size calculations. * * @module quality/feedback-ab * @issue #148 */ // ============================================================================ // A/B Testing Engine // ============================================================================ export function runABTest(control, treatment, config = {}) { const alpha = config.alpha ?? 0.05; const minSamples = config.minSamples ?? 10; if (control.deltas.length < minSamples || treatment.deltas.length < minSamples) { throw new Error(`Insufficient samples: control=${control.deltas.length}, treatment=${treatment.deltas.length}, minimum=${minSamples}`); } const controlMean = mean(control.deltas); const treatmentMean = mean(treatment.deltas); const controlStdDev = stdDev(control.deltas); const treatmentStdDev = stdDev(treatment.deltas); const controlN = control.deltas.length; const treatmentN = treatment.deltas.length; const { tStatistic, degreesOfFreedom } = welchTTest(controlMean, treatmentMean, controlStdDev, treatmentStdDev, controlN, treatmentN); const pValue = tTestPValue(Math.abs(tStatistic), degreesOfFreedom); const pooledStdDev = Math.sqrt(((controlN - 1) * controlStdDev ** 2 + (treatmentN - 1) * treatmentStdDev ** 2) / (controlN + treatmentN - 2)); const effectSize = pooledStdDev > 0 ? (treatmentMean - controlMean) / pooledStdDev : 0; return { controlName: control.name, treatmentName: treatment.name, controlMean: round(controlMean), treatmentMean: round(treatmentMean), controlStdDev: round(controlStdDev), treatmentStdDev: round(treatmentStdDev), tStatistic: round(tStatistic), degreesOfFreedom: Math.round(degreesOfFreedom), pValue: round(pValue, 4), significant: pValue < alpha, alpha, effectSize: round(effectSize), effectInterpretation: interpretEffectSize(Math.abs(effectSize)), controlN, treatmentN, }; } // ============================================================================ // Statistical Helpers // ============================================================================ export function mean(values) { if (values.length === 0) return 0; return values.reduce((sum, v) => sum + v, 0) / values.length; } export function stdDev(values) { if (values.length < 2) return 0; const m = mean(values); const variance = values.reduce((sum, v) => sum + (v - m) ** 2, 0) / (values.length - 1); return Math.sqrt(variance); } export function welchTTest(mean1, mean2, sd1, sd2, n1, n2) { const se1 = (sd1 ** 2) / n1; const se2 = (sd2 ** 2) / n2; const seDiff = Math.sqrt(se1 + se2); const tStatistic = seDiff > 0 ? (mean1 - mean2) / seDiff : 0; const numerator = (se1 + se2) ** 2; const denominator = (se1 ** 2) / (n1 - 1) + (se2 ** 2) / (n2 - 1); const degreesOfFreedom = denominator > 0 ? numerator / denominator : 1; return { tStatistic, degreesOfFreedom }; } export function tTestPValue(tAbs, df) { if (df > 1000) { return 2 * normalCDF(-tAbs); } const x = df / (df + tAbs ** 2); const p = incompleteBeta(x, df / 2, 0.5); return Math.min(1, Math.max(0, p)); } function normalCDF(x) { const a1 = 0.254829592; const a2 = -0.284496736; const a3 = 1.421413741; const a4 = -1.453152027; const a5 = 1.061405429; const p = 0.3275911; const sign = x < 0 ? -1 : 1; const absX = Math.abs(x); const t = 1.0 / (1.0 + p * absX); const y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX / 2); return 0.5 * (1.0 + sign * y); } function incompleteBeta(x, a, b) { if (x <= 0) return 0; if (x >= 1) return 1; const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b); const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a; let f = 1; let c = 1; let d = 1 - (a + b) * x / (a + 1); if (Math.abs(d) < 1e-30) d = 1e-30; d = 1 / d; f = d; for (let m = 1; m <= 200; m++) { let numerator = m * (b - m) * x / ((a + 2 * m - 1) * (a + 2 * m)); d = 1 + numerator * d; if (Math.abs(d) < 1e-30) d = 1e-30; c = 1 + numerator / c; if (Math.abs(c) < 1e-30) c = 1e-30; d = 1 / d; f *= c * d; numerator = -(a + m) * (a + b + m) * x / ((a + 2 * m) * (a + 2 * m + 1)); d = 1 + numerator * d; if (Math.abs(d) < 1e-30) d = 1e-30; c = 1 + numerator / c; if (Math.abs(c) < 1e-30) c = 1e-30; d = 1 / d; const delta = c * d; f *= delta; if (Math.abs(delta - 1.0) < 1e-10) break; } return front * f; } function lnGamma(z) { if (z <= 0) return 0; const g = 7; const c = [ 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313, -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7, ]; if (z < 0.5) { return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z); } z -= 1; let x = c[0]; for (let i = 1; i < g + 2; i++) { x += c[i] / (z + i); } const t = z + g + 0.5; return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x); } export function interpretEffectSize(d) { const absD = Math.abs(d); if (absD < 0.2) return 'negligible'; if (absD < 0.5) return 'small'; if (absD < 0.8) return 'medium'; return 'large'; } function round(value, decimals = 2) { const factor = 10 ** decimals; return Math.round(value * factor) / factor; } //# sourceMappingURL=feedback-ab.js.map