datapilot-cli
Version:
Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform
290 lines • 10.1 kB
JavaScript
;
/**
* Input Validation and Sanitization
* Security layer for file paths and CSV content
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.ExternalDataValidator = exports.globalInputValidator = exports.InputValidator = void 0;
exports.getInputValidator = getInputValidator;
const path_1 = require("path");
const DEFAULT_SECURITY_CONFIG = {
allowedExtensions: ['.csv', '.tsv', '.txt'],
maxFileSize: 10 * 1024 * 1024 * 1024, // 10GB
maxPathLength: 1000,
allowedDirectories: [], // Empty means allow current working directory
blockedPatterns: [
/\.\./, // Path traversal
/[<>:"|?*]/, // Windows invalid chars
/[\x00-\x1f]/, // Control characters
/^\s|\s$/, // Leading/trailing spaces
],
};
class InputValidator {
config;
constructor(config = {}) {
this.config = { ...DEFAULT_SECURITY_CONFIG, ...config };
}
/**
* Validate and sanitize file path
*/
validateFilePath(inputPath) {
const errors = [];
const warnings = [];
// Basic validation
if (!inputPath || typeof inputPath !== 'string') {
errors.push('File path is required and must be a string');
return {
isValid: false,
valid: false,
errors,
warnings
};
}
if (inputPath.length > this.config.maxPathLength) {
errors.push(`File path too long (max ${this.config.maxPathLength} characters)`);
}
// Check for blocked patterns
for (const pattern of this.config.blockedPatterns) {
if (pattern.test(inputPath)) {
errors.push(`File path contains invalid pattern: ${pattern.source}`);
}
}
try {
// Normalize and resolve path
const normalizedPath = (0, path_1.normalize)(inputPath);
const resolvedPath = (0, path_1.resolve)(normalizedPath);
// Check file extension
const extension = this.getFileExtension(resolvedPath);
if (!this.config.allowedExtensions.includes(extension)) {
errors.push(`File extension '${extension}' not allowed. Allowed: ${this.config.allowedExtensions.join(', ')}`);
}
// Check if path stays within allowed directories
if (this.config.allowedDirectories.length > 0) {
const isAllowed = this.config.allowedDirectories.some((allowedDir) => {
const normalizedAllowed = (0, path_1.resolve)(allowedDir);
return resolvedPath.startsWith(normalizedAllowed);
});
if (!isAllowed) {
errors.push('File path outside allowed directories');
}
}
const isValid = errors.length === 0;
return {
isValid,
valid: isValid,
sanitizedValue: resolvedPath,
sanitized: resolvedPath,
errors,
warnings,
};
}
catch (error) {
errors.push(`Path resolution failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
return {
isValid: false,
valid: false,
errors,
warnings
};
}
}
/**
* Validate CSV content header
*/
validateCSVHeader(header) {
const errors = [];
const warnings = [];
if (!header || typeof header !== 'string') {
errors.push('CSV header is required');
return { isValid: false, valid: false, errors, warnings };
}
// Check for control characters
if (/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/.test(header)) {
errors.push('CSV header contains control characters');
}
// Check for extremely long headers
if (header.length > 10000) {
warnings.push('CSV header is very long (>10KB)');
}
// Check for suspicious patterns
const suspiciousPatterns = [
/javascript:/i,
/data:.*base64/i,
/<script/i,
/eval\s*\(/i,
/function\s*\(/i,
];
for (const pattern of suspiciousPatterns) {
if (pattern.test(header)) {
warnings.push(`CSV header contains suspicious pattern: ${pattern.source}`);
}
}
const isValid = errors.length === 0;
return {
isValid,
valid: isValid,
sanitizedValue: header.trim(),
sanitized: header.trim(),
errors,
warnings,
};
}
/**
* Validate individual CSV field
*/
validateCSVField(field, maxLength = 1000) {
const errors = [];
const warnings = [];
if (field && field.length > maxLength) {
errors.push(`Field too long (max ${maxLength} characters)`);
}
// Check for null bytes
if (field && field.includes('\x00')) {
errors.push('Field contains null bytes');
}
const isValid = errors.length === 0;
return {
isValid,
valid: isValid,
sanitizedValue: field,
sanitized: field,
errors,
warnings,
};
}
/**
* Validate configuration object
*/
validateConfig(config) {
const errors = [];
const warnings = [];
if (!config || typeof config !== 'object') {
errors.push('Configuration must be an object');
return { isValid: false, valid: false, errors, warnings };
}
// Check for dangerous configuration values
const dangerousKeys = ['eval', 'function', 'require', 'import', '__proto__', 'constructor'];
const checkObject = (obj, path = '') => {
for (const key in obj) {
const fullPath = path ? `${path}.${key}` : key;
if (dangerousKeys.includes(key.toLowerCase())) {
warnings.push(`Potentially dangerous configuration key: ${fullPath}`);
}
if (typeof obj[key] === 'object' && obj[key] !== null) {
checkObject(obj[key], fullPath);
}
}
};
checkObject(config);
const isValid = errors.length === 0;
return {
isValid,
valid: isValid,
sanitizedValue: config,
sanitized: config,
errors,
warnings,
};
}
/**
* Validate CLI input options
*/
validateCLIInput(options, context) {
const errors = [];
const warnings = [];
const sanitizedOptions = {};
for (const [key, value] of Object.entries(options)) {
// Check for dangerous keys
if (['eval', 'function', 'require', 'import', '__proto__', 'constructor'].includes(key.toLowerCase())) {
errors.push(`Dangerous CLI option key: ${key}`);
continue;
}
// Sanitize string values
if (typeof value === 'string') {
// Check for script injection
if (/<script|javascript:|data:/.test(value)) {
errors.push(`Potentially dangerous value in CLI option: ${key}`);
continue;
}
sanitizedOptions[key] = value.trim();
}
else {
sanitizedOptions[key] = value;
}
}
const isValid = errors.length === 0;
return {
isValid,
valid: isValid,
sanitizedValue: sanitizedOptions,
sanitized: sanitizedOptions,
errors,
warnings,
};
}
/**
* Comprehensive security validation
*/
validateInput(input) {
const allErrors = [];
const allWarnings = [];
if (input.filePath) {
const pathResult = this.validateFilePath(input.filePath);
allErrors.push(...pathResult.errors.map(e => typeof e === 'string' ? e : e.message));
allWarnings.push(...pathResult.warnings);
}
if (input.config) {
const configResult = this.validateConfig(input.config);
allErrors.push(...configResult.errors.map(e => typeof e === 'string' ? e : e.message));
allWarnings.push(...configResult.warnings);
}
if (input.csvHeader) {
const headerResult = this.validateCSVHeader(input.csvHeader);
allErrors.push(...headerResult.errors.map(e => typeof e === 'string' ? e : e.message));
allWarnings.push(...headerResult.warnings);
}
const isValid = allErrors.length === 0;
return {
isValid,
valid: isValid,
errors: allErrors,
warnings: allWarnings,
};
}
getFileExtension(filePath) {
const lastDot = filePath.lastIndexOf('.');
return lastDot === -1 ? '' : filePath.substring(lastDot).toLowerCase();
}
}
exports.InputValidator = InputValidator;
// Global validator instance
exports.globalInputValidator = new InputValidator();
/**
* Factory function for easy access
*/
function getInputValidator() {
return exports.globalInputValidator;
}
/**
* External data validator for additional security checks
*/
class ExternalDataValidator {
validator;
constructor() {
this.validator = new InputValidator();
}
/**
* Validate external data sources
*/
validateDataSource(source, context) {
return this.validator.validateFilePath(source);
}
/**
* Sanitize external input
*/
sanitizeInput(input) {
return input.replace(/[<>:"|?*\x00-\x1f]/g, '').trim();
}
}
exports.ExternalDataValidator = ExternalDataValidator;
//# sourceMappingURL=input-validator.js.map