semantic-ds-toolkit
Version:
Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference
95 lines • 3.23 kB
TypeScript
export interface KolmogorovSmirnovResult {
statistic: number;
p_value: number;
critical_value: number;
is_significant: boolean;
}
export interface PopulationStabilityResult {
psi_score: number;
stability_category: 'stable' | 'minor_shift' | 'major_shift' | 'significant_shift';
bin_contributions: Array<{
bin: string;
expected_pct: number;
actual_pct: number;
contribution: number;
}>;
}
export interface ChiSquareResult {
statistic: number;
p_value: number;
degrees_of_freedom: number;
is_significant: boolean;
}
export declare class StatisticalTests {
private readonly DEFAULT_BINS;
private readonly PSI_THRESHOLDS;
/**
* Kolmogorov-Smirnov test for distribution comparison
* Tests whether two datasets come from the same distribution
*/
kolmogorovSmirnovTest(sample1: number[], sample2: number[], alpha?: number): KolmogorovSmirnovResult;
/**
* Population Stability Index (PSI) calculation
* Measures distribution shift between expected and actual datasets
*/
populationStabilityIndex(expected: number[], actual: number[], bins?: number): number;
/**
* Detailed PSI calculation with bin-level analysis
*/
calculatePSIDetailed(expected: number[], actual: number[], bins?: number): PopulationStabilityResult;
/**
* Chi-square test for categorical distribution comparison
*/
chiSquareTest(observed: number[], expected: number[], alpha?: number): ChiSquareResult;
/**
* Anderson-Darling test for distribution comparison
* More sensitive to tail differences than KS test
*/
andersonDarlingTest(sample1: number[], sample2: number[]): {
statistic: number;
is_significant: boolean;
};
/**
* Wasserstein (Earth Mover's) distance between two distributions
*/
wassersteinDistance(sample1: number[], sample2: number[]): number;
/**
* Performance-optimized PSI for large datasets
*/
fastPSI(expected: number[], actual: number[], sampleSize?: number): number;
private countLessEqual;
private countEqual;
private calculateBinBoundaries;
private binData;
private getKSCriticalValue;
private calculateKSPValue;
private calculateChiSquarePValue;
private normalCDF;
private erf;
private incompleteGamma;
private gamma;
private sampleArray;
/**
* Batch processing for multiple distribution comparisons
*/
batchKSTest(referenceSamples: number[][], testSamples: number[][], alpha?: number): KolmogorovSmirnovResult[];
/**
* Comprehensive distribution comparison report
*/
compareDistributions(expected: number[], actual: number[], options?: {
alpha?: number;
bins?: number;
includePSI?: boolean;
includeWasserstein?: boolean;
}): {
ks_test: KolmogorovSmirnovResult;
psi_analysis?: PopulationStabilityResult;
wasserstein_distance?: number;
summary: {
drift_detected: boolean;
severity: 'none' | 'low' | 'medium' | 'high';
primary_indicator: string;
};
};
}
//# sourceMappingURL=statistical-tests.d.ts.map