UNPKG

@flightstream/utils-arrow

Version:

Advanced utilities for working with Arrow data and Flight protocol in FlightStream for Node.js

160 lines (137 loc) 4.35 kB
/** * TypeDetector - Encapsulates all logic for inferring types from sample values * * This class provides methods to analyze sample data and determine the most * appropriate data type. It handles boolean, numeric, date, timestamp, and * string detection with configurable options. */ export class TypeDetector { constructor(options = {}) { this.options = { strictMode: false, dateFormats: ['YYYY-MM-DD', 'YYYY-MM-DD HH:mm:ss'], integerThreshold: Number.MAX_SAFE_INTEGER, ...options }; } /** * Infer data type from a sample value * @param {*} value - Sample value to analyze * @param {Object} options - Type inference options (overrides constructor options) * @returns {string} Inferred type name */ inferType(value, options = {}) { const mergedOptions = { ...this.options, ...options }; if (value === null || value === undefined || value === '') { return 'string'; // default to string for null/empty values } const strValue = String(value).trim(); // Boolean detection if (this.isBooleanValue(strValue)) { return 'boolean'; } // Numeric detection const numericType = this.inferNumericType(strValue, mergedOptions); if (numericType) { return numericType; } // Date detection if (this.isDateValue(strValue)) { return 'date'; } // Timestamp detection if (this.isTimestampValue(strValue)) { return 'timestamp'; } // Default to string return 'string'; } /** * Check if a value represents a boolean * @param {string} value - String value to check * @returns {boolean} */ isBooleanValue(value) { const lowerValue = value.toLowerCase(); return ['true', 'false', 'yes', 'no', '1', '0', 'y', 'n'].includes(lowerValue); } /** * Infer numeric type from string value * @param {string} value - String value to analyze * @param {Object} options - Numeric inference options * @returns {string|null} Numeric type or null if not numeric */ inferNumericType(value, options = {}) { const { strictMode = false, integerThreshold = Number.MAX_SAFE_INTEGER } = options; // Integer detection if (/^-?\d+$/.test(value)) { const intValue = parseInt(value, 10); if (Math.abs(intValue) <= integerThreshold) { return 'int64'; } else { return 'string'; // Too large for safe integer } } // Float detection if (/^-?\d*\.\d+$/.test(value) || /^-?\d+\.?\d*[eE][+-]?\d+$/.test(value)) { return 'float64'; } // Percentage if (/^-?\d*\.?\d+%$/.test(value)) { return strictMode ? 'string' : 'float64'; } // Currency (simple detection) if (/^[$€£¥]\d+\.?\d*$/.test(value)) { return strictMode ? 'string' : 'float64'; } return null; } /** * Check if a value represents a date * @param {string} value - String value to check * @returns {boolean} */ isDateValue(value) { // Simple date patterns const datePatterns = [ /^\d{4}-\d{2}-\d{2}$/, // YYYY-MM-DD /^\d{2}\/\d{2}\/\d{4}$/, // MM/DD/YYYY /^\d{2}-\d{2}-\d{4}$/, // MM-DD-YYYY /^\d{4}\/\d{2}\/\d{2}$/, // YYYY/MM/DD ]; // Check against patterns for (const pattern of datePatterns) { if (pattern.test(value)) { // Validate it's actually a valid date const date = new Date(value); return !isNaN(date.getTime()); } } return false; } /** * Check if a value represents a timestamp * @param {string} value - String value to check * @returns {boolean} */ isTimestampValue(value) { // ISO timestamp if (/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/.test(value)) { const date = new Date(value); return !isNaN(date.getTime()); } // Unix timestamp (seconds) if (/^\d{10}$/.test(value)) { const timestamp = parseInt(value, 10); // Check if it's a reasonable timestamp (between 1970 and 2050) return timestamp > 0 && timestamp < 2524608000; } // Unix timestamp (milliseconds) if (/^\d{13}$/.test(value)) { const timestamp = parseInt(value, 10); // Check if it's a reasonable timestamp return timestamp > 0 && timestamp < 2524608000000; } return false; } }