datapilot-cli
Version:
Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform
30 lines • 1.26 kB
TypeScript
/**
* Shared Data Quality Utilities
* Provides consistent calculations across all sections to prevent inter-section discrepancies
*/
export interface UniquenessResult {
uniqueCount: number;
uniquePercentage: number;
duplicateCount: number;
totalNonNullValues: number;
}
/**
* Standardized uniqueness calculation used across all sections
* Fixes inter-section consistency bug where Section 2 and Section 4 report different statistics
*/
export declare function calculateUniqueness(data: (string | null | undefined)[][], columnIndex: number, normalizeValue?: (value: string | null | undefined) => string | null): UniquenessResult;
/**
* Default value normalization - consistent across sections
*/
export declare function defaultNormalizeValue(value: string | null | undefined): string | null;
/**
* Calculate uniqueness for multiple columns efficiently
*/
export declare function calculateColumnUniqueness(data: (string | null | undefined)[][], headers: string[], normalizeValue?: (value: string | null | undefined) => string | null): Array<{
columnName: string;
uniqueCount: number;
uniquePercentage: number;
duplicateCount: number;
totalNonNullValues: number;
}>;
//# sourceMappingURL=data-quality-utils.d.ts.map