UNPKG

datapilot-cli

Version:

Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform

659 lines 24.6 kB
"use strict"; /** * Comprehensive Input Validation and Sanitization System * Prevents errors through strict input validation and safe defaults */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.InputValidator = void 0; const fs_1 = require("fs"); const path = __importStar(require("path")); const logger_1 = require("./logger"); const types_1 = require("../core/types"); class InputValidator { static FILE_SIZE_LIMITS = { maxFileSize: 10 * 1024 * 1024 * 1024, // 10GB maxMemoryFile: 100 * 1024 * 1024, // 100MB for in-memory processing }; static SECURITY_PATTERNS = { pathTraversal: /\.\.[\/\\]/, sqlInjection: /(union|select|insert|update|delete|drop|create|alter|exec|execute)/i, // Comprehensive HTML/Script injection patterns - more secure approach htmlTags: /<\/?[a-z][\s\S]*>/gi, scriptElements: /<script[\s\S]*?<\/script>/gi, dangerousElements: /<(script|iframe|object|embed|form|input|link|meta|style|base|applet)[\s\S]*?>/gi, eventHandlers: /\bon\w+\s*=\s*["']?[^"'>]*["']?/gi, javascriptProtocol: /javascript\s*:/gi, vbscriptProtocol: /vbscript\s*:/gi, dataUrls: /data\s*:\s*text\s*\/\s*(html|javascript)/gi, commandInjection: /[;&|`$(){}\[\]]/, }; /** * Validate and sanitize file path */ static async validateFilePath(filePath) { const errors = []; const warnings = []; let sanitizedPath = filePath; try { // Basic validation if (!filePath || typeof filePath !== 'string') { errors.push({ field: 'filePath', message: 'File path must be a non-empty string', severity: 'error', value: filePath, expectedType: 'string', }); return { isValid: false, errors, warnings, sanitizedValue: null }; } // Sanitize path sanitizedPath = path.normalize(filePath.trim()); // Security checks if (this.SECURITY_PATTERNS.pathTraversal.test(sanitizedPath)) { errors.push({ field: 'filePath', message: 'Path traversal detected in file path', severity: 'error', value: filePath, }); } // Check if path is absolute if (!path.isAbsolute(sanitizedPath)) { sanitizedPath = path.resolve(sanitizedPath); warnings.push({ field: 'filePath', message: 'Relative path converted to absolute', severity: 'warning', value: filePath, }); } // Check file existence and permissions try { const stats = await fs_1.promises.stat(sanitizedPath); if (!stats.isFile()) { errors.push({ field: 'filePath', message: 'Path does not point to a regular file', severity: 'error', value: sanitizedPath, }); } // Check file size if (stats.size > this.FILE_SIZE_LIMITS.maxFileSize) { errors.push({ field: 'filePath', message: `File size (${stats.size}) exceeds maximum limit (${this.FILE_SIZE_LIMITS.maxFileSize})`, severity: 'error', value: stats.size, }); } if (stats.size > this.FILE_SIZE_LIMITS.maxMemoryFile) { warnings.push({ field: 'filePath', message: 'File is large and will require streaming processing', severity: 'warning', value: stats.size, }); } } catch (fsError) { if (fsError.code === 'ENOENT') { errors.push({ field: 'filePath', message: 'File does not exist', severity: 'error', value: sanitizedPath, }); } else if (fsError.code === 'EACCES') { errors.push({ field: 'filePath', message: 'Permission denied accessing file', severity: 'error', value: sanitizedPath, }); } else { errors.push({ field: 'filePath', message: `File system error: ${fsError.message}`, severity: 'error', value: sanitizedPath, }); } } return { isValid: errors.length === 0, errors, warnings, sanitizedValue: sanitizedPath, }; } catch (error) { errors.push({ field: 'filePath', message: `Validation error: ${error.message}`, severity: 'error', value: filePath, }); return { isValid: false, errors, warnings, sanitizedValue: null }; } } /** * Validate worker pool configuration */ static validateWorkerPoolConfig(config) { const schema = { maxWorkers: { type: 'number', required: false, default: Math.max(2, require('os').cpus().length - 1), min: 1, max: 32, rules: [ { validate: (value) => Number.isInteger(value), message: 'maxWorkers must be an integer', severity: 'error', }, ], }, memoryLimitMB: { type: 'number', required: false, default: 256, min: 64, max: 8192, rules: [ { validate: (value) => value % 64 === 0, message: 'memoryLimitMB should be a multiple of 64', severity: 'warning', }, ], }, taskTimeout: { type: 'number', required: false, default: 30000, min: 1000, max: 300000, }, enableMemoryMonitoring: { type: 'boolean', required: false, default: true, }, }; return this.validateObject(config, schema, 'workerPoolConfig'); } /** * Validate streaming configuration */ static validateStreamingConfig(config) { const schema = { chunkSize: { type: 'number', required: false, default: 64 * 1024, min: 1024, max: 64 * 1024 * 1024, rules: [ { validate: (value) => (value & (value - 1)) === 0, message: 'chunkSize should be a power of 2 for optimal performance', severity: 'warning', }, ], }, memoryThresholdMB: { type: 'number', required: false, default: 512, min: 64, max: 16384, }, maxRowsAnalyzed: { type: 'number', required: false, default: 1000000, min: 1000, max: 100000000, }, enableAdaptiveStreaming: { type: 'boolean', required: false, default: true, }, }; return this.validateObject(config, schema, 'streamingConfig'); } /** * Validate buffer data */ static validateBuffer(buffer, maxSize) { const errors = []; const warnings = []; if (!Buffer.isBuffer(buffer)) { errors.push({ field: 'buffer', message: 'Value is not a valid Buffer', severity: 'error', value: typeof buffer, expectedType: 'Buffer', }); return { isValid: false, errors, warnings, sanitizedValue: null }; } if (maxSize && buffer.length > maxSize) { errors.push({ field: 'buffer', message: `Buffer size (${buffer.length}) exceeds maximum (${maxSize})`, severity: 'error', value: buffer.length, }); } if (buffer.length === 0) { warnings.push({ field: 'buffer', message: 'Buffer is empty', severity: 'warning', value: buffer.length, }); } return { isValid: errors.length === 0, errors, warnings, sanitizedValue: buffer, }; } /** * Validate and sanitize CSV parsing options */ static validateCSVOptions(options) { const schema = { delimiter: { type: 'string', required: false, default: ',', rules: [ { validate: (value) => value.length === 1, message: 'Delimiter must be a single character', severity: 'error', }, ], }, quote: { type: 'string', required: false, default: '"', rules: [ { validate: (value) => value.length === 1, message: 'Quote character must be a single character', severity: 'error', }, ], }, encoding: { type: 'string', required: false, default: 'utf8', enum: ['utf8', 'ascii', 'latin1', 'utf16le', 'base64', 'hex'], }, hasHeader: { type: 'boolean', required: false, default: true, }, maxRows: { type: 'number', required: false, default: 1000000, min: 1, max: 100000000, }, }; return this.validateObject(options, schema, 'csvOptions'); } /** * Validate numeric array data */ static validateNumericArray(data, fieldName = 'data') { const errors = []; const warnings = []; const sanitizedData = data; if (!Array.isArray(data)) { errors.push({ field: fieldName, message: 'Value must be an array', severity: 'error', value: typeof data, expectedType: 'array', }); return { isValid: false, errors, warnings, sanitizedValue: null }; } if (data.length === 0) { warnings.push({ field: fieldName, message: 'Array is empty', severity: 'warning', value: data.length, }); return { isValid: true, errors, warnings, sanitizedValue: [] }; } // Validate and sanitize numeric values const sanitizedArray = []; let invalidCount = 0; for (let i = 0; i < data.length; i++) { const value = data[i]; if (typeof value === 'number' && isFinite(value)) { sanitizedArray.push(value); } else if (typeof value === 'string') { const parsed = parseFloat(value); if (isFinite(parsed)) { sanitizedArray.push(parsed); } else { invalidCount++; } } else if (value === null || value === undefined) { // Skip null/undefined values continue; } else { invalidCount++; } } if (invalidCount > 0) { const invalidRatio = invalidCount / data.length; if (invalidRatio > 0.5) { errors.push({ field: fieldName, message: `Too many invalid numeric values (${invalidCount}/${data.length})`, severity: 'error', value: invalidCount, }); } else { warnings.push({ field: fieldName, message: `Skipped ${invalidCount} invalid numeric values`, severity: 'warning', value: invalidCount, }); } } return { isValid: errors.length === 0 && sanitizedArray.length > 0, errors, warnings, sanitizedValue: sanitizedArray, }; } /** * Generic object validation against schema */ static validateObject(obj, schema, objectName = 'object') { const errors = []; const warnings = []; const sanitizedValue = {}; if (typeof obj !== 'object' || obj === null) { errors.push({ field: objectName, message: 'Value must be an object', severity: 'error', value: typeof obj, expectedType: 'object', }); return { isValid: false, errors, warnings, sanitizedValue: null }; } // Validate each field in schema for (const [fieldName, fieldSchema] of Object.entries(schema)) { const fieldPath = `${objectName}.${fieldName}`; let value = obj[fieldName]; // Handle missing required fields if (value === undefined || value === null) { if (fieldSchema.required) { errors.push({ field: fieldPath, message: `Required field '${fieldName}' is missing`, severity: 'error', value: value, }); continue; } else if (fieldSchema.default !== undefined) { value = fieldSchema.default; } else { continue; // Skip optional fields without defaults } } // Type validation if (!this.validateType(value, fieldSchema.type)) { errors.push({ field: fieldPath, message: `Field '${fieldName}' must be of type ${fieldSchema.type}`, severity: 'error', value: typeof value, expectedType: fieldSchema.type, }); continue; } // Range validation for numbers if (fieldSchema.type === 'number') { if (fieldSchema.min !== undefined && value < fieldSchema.min) { errors.push({ field: fieldPath, message: `Field '${fieldName}' must be >= ${fieldSchema.min}`, severity: 'error', value: value, }); continue; } if (fieldSchema.max !== undefined && value > fieldSchema.max) { errors.push({ field: fieldPath, message: `Field '${fieldName}' must be <= ${fieldSchema.max}`, severity: 'error', value: value, }); continue; } } // Pattern validation for strings if (fieldSchema.type === 'string' && fieldSchema.pattern) { if (!fieldSchema.pattern.test(value)) { errors.push({ field: fieldPath, message: `Field '${fieldName}' does not match required pattern`, severity: 'error', value: value, }); continue; } } // Enum validation if (fieldSchema.enum && !fieldSchema.enum.includes(value)) { errors.push({ field: fieldPath, message: `Field '${fieldName}' must be one of: ${fieldSchema.enum.join(', ')}`, severity: 'error', value: value, }); continue; } // Custom rule validation if (fieldSchema.rules) { let sanitizedFieldValue = value; for (const rule of fieldSchema.rules) { if (!rule.validate(value)) { if (rule.severity === 'error') { errors.push({ field: fieldPath, message: rule.message, severity: 'error', value: value, }); } else { warnings.push({ field: fieldPath, message: rule.message, severity: 'warning', value: value, }); } } // Apply sanitization if available if (rule.sanitize) { sanitizedFieldValue = rule.sanitize(sanitizedFieldValue); } } value = sanitizedFieldValue; } sanitizedValue[fieldName] = value; } return { isValid: errors.length === 0, errors, warnings, sanitizedValue: errors.length === 0 ? sanitizedValue : null, }; } /** * Type validation helper */ static validateType(value, expectedType) { switch (expectedType) { case 'string': return typeof value === 'string'; case 'number': return typeof value === 'number' && isFinite(value); case 'boolean': return typeof value === 'boolean'; case 'array': return Array.isArray(value); case 'object': return typeof value === 'object' && value !== null && !Array.isArray(value); case 'function': return typeof value === 'function'; case 'buffer': return Buffer.isBuffer(value); default: return false; } } /** * Security sanitization for string inputs * Uses comprehensive HTML filtering to prevent XSS attacks */ static sanitizeString(input) { if (typeof input !== 'string') { return ''; } return input // Remove dangerous HTML elements and script tags .replace(this.SECURITY_PATTERNS.dangerousElements, '') .replace(this.SECURITY_PATTERNS.scriptElements, '') // Remove event handlers (onclick, onload, etc.) .replace(this.SECURITY_PATTERNS.eventHandlers, '') // Remove dangerous protocols .replace(this.SECURITY_PATTERNS.javascriptProtocol, '') .replace(this.SECURITY_PATTERNS.vbscriptProtocol, '') // Remove data URLs that could contain HTML/JS .replace(this.SECURITY_PATTERNS.dataUrls, '') // Remove control characters and null bytes .replace(/[\x00-\x1F\x7F]/g, '') .trim() .slice(0, 10000); // Limit length } /** * Secure HTML sanitization for content that may contain HTML * Implements a whitelist approach for maximum security */ static sanitizeHTML(input, allowedTags = []) { if (typeof input !== 'string') { return ''; } // First pass: Remove all HTML if no tags are allowed if (allowedTags.length === 0) { return input .replace(this.SECURITY_PATTERNS.htmlTags, '') .replace(/[\x00-\x1F\x7F]/g, '') .trim(); } // Second pass: Remove dangerous elements and attributes let sanitized = input // Remove dangerous elements completely .replace(this.SECURITY_PATTERNS.dangerousElements, '') // Remove all event handlers .replace(this.SECURITY_PATTERNS.eventHandlers, '') // Remove dangerous protocols .replace(this.SECURITY_PATTERNS.javascriptProtocol, 'removed:') .replace(this.SECURITY_PATTERNS.vbscriptProtocol, 'removed:') // Remove dangerous data URLs .replace(this.SECURITY_PATTERNS.dataUrls, 'removed:'); // Third pass: Only allow specified tags const allowedTagsRegex = new RegExp(`<(?!/?(?:${allowedTags.join('|')})(?:\\s|>))[^>]*>`, 'gi'); sanitized = sanitized.replace(allowedTagsRegex, ''); return sanitized .replace(/[\x00-\x1F\x7F]/g, '') .trim() .slice(0, 50000); // Larger limit for HTML content } /** * Validate and throw on validation errors */ static validateAndThrow(result, operation) { if (!result.isValid) { const errorMessages = result.errors.map((e) => `${e.field}: ${e.message}`).join('; '); throw new types_1.DataPilotError(`Validation failed for ${operation}: ${errorMessages}`, 'VALIDATION_ERROR', types_1.ErrorSeverity.HIGH, types_1.ErrorCategory.VALIDATION); } // Log warnings if (result.warnings.length > 0) { const warningMessages = result.warnings.map((w) => `${w.field}: ${w.message}`).join('; '); logger_1.logger.warn(`Validation warnings for ${operation}: ${warningMessages}`); } return result.sanitizedValue; } } exports.InputValidator = InputValidator; //# sourceMappingURL=input-validator.js.map