UNPKG

datapilot-cli

Version:

Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform

61 lines 2.23 kB
"use strict"; /** * Shared Data Quality Utilities * Provides consistent calculations across all sections to prevent inter-section discrepancies */ Object.defineProperty(exports, "__esModule", { value: true }); exports.calculateUniqueness = calculateUniqueness; exports.defaultNormalizeValue = defaultNormalizeValue; exports.calculateColumnUniqueness = calculateColumnUniqueness; /** * Standardized uniqueness calculation used across all sections * Fixes inter-section consistency bug where Section 2 and Section 4 report different statistics */ function calculateUniqueness(data, columnIndex, normalizeValue) { const valueMap = new Map(); let nonNullCount = 0; // Default normalization function if none provided const normalize = normalizeValue || defaultNormalizeValue; for (let rowIdx = 0; rowIdx < data.length; rowIdx++) { const value = normalize(data[rowIdx]?.[columnIndex]); if (value !== null) { nonNullCount++; const key = String(value); valueMap.set(key, (valueMap.get(key) || 0) + 1); } } const uniqueCount = valueMap.size; const uniquePercentage = nonNullCount > 0 ? (uniqueCount / nonNullCount) * 100 : 0; const duplicateCount = nonNullCount - uniqueCount; return { uniqueCount, uniquePercentage: Number(uniquePercentage.toFixed(2)), // Consistent rounding to 2 decimal places duplicateCount, totalNonNullValues: nonNullCount, }; } /** * Default value normalization - consistent across sections */ function defaultNormalizeValue(value) { if (value === null || value === undefined) return null; const trimmed = String(value).trim(); if (trimmed === '' || trimmed.toLowerCase() === 'null' || trimmed.toLowerCase() === 'na') { return null; } return trimmed; } /** * Calculate uniqueness for multiple columns efficiently */ function calculateColumnUniqueness(data, headers, normalizeValue) { return headers.map((columnName, colIdx) => { const result = calculateUniqueness(data, colIdx, normalizeValue); return { columnName, ...result, }; }); } //# sourceMappingURL=data-quality-utils.js.map