UNPKG

file-validation-engine

Version:

A dynamic rule-based validation engine for Excel and CSV files

144 lines (130 loc) 5.35 kB
import fs from 'fs'; import path from 'path'; import { parse as csvParse } from 'csv-parse'; import * as XLSX from 'xlsx'; export type Rule = { column: string; validate: (value: any, row: any) => boolean; errorMessage: string; condition?: (row: any) => boolean; unique?: boolean; }; export interface RuleEngineOptions { chunkSize?: number; } export class RuleEngine { private rules: Rule[]; private options: RuleEngineOptions; private seenValues: Record<string, Set<any>> = {}; constructor(rules: Rule[], options: RuleEngineOptions = {}) { this.rules = rules; this.options = { chunkSize: 10000, ...options }; this.initUniqueTracking(); } private initUniqueTracking() { this.seenValues = {}; this.rules.forEach(rule => { if (rule.unique) this.seenValues[rule.column] = new Set(); }); } async validateFile( filePath: string, outputErrorFile: string ): Promise<boolean> { // Reset seenValues for each file this.rules.forEach(rule => { if (rule.unique) this.seenValues[rule.column] = new Set(); }); let hasErrors = false; const ext = path.extname(filePath).toLowerCase(); if (ext === '.csv') { hasErrors = await this.validateCSV(filePath, outputErrorFile); } else if (ext === '.xlsx' || ext === '.xls') { hasErrors = await this.validateExcel(filePath, outputErrorFile); } else { throw new Error('Unsupported file type'); } return hasErrors; } private async validateCSV(filePath: string, outputErrorFile: string): Promise<boolean> { return new Promise<boolean>((resolve, reject) => { let hasErrors = false; const readStream = fs.createReadStream(filePath); const parser = csvParse({ columns: true }); let rowNum = 1; let chunk: any[] = []; const writeErrors = (errors: string[]) => { if (errors.length > 0) { hasErrors = true; fs.appendFileSync(outputErrorFile, errors.join('\n') + '\n'); } }; parser.on('readable', () => { let record; while ((record = parser.read())) { chunk.push(record); if (chunk.length >= this.options.chunkSize!) { const errors = this.validateChunk(chunk, rowNum); writeErrors(errors); rowNum += chunk.length; chunk = []; } } }); parser.on('end', () => { if (chunk.length > 0) { const errors = this.validateChunk(chunk, rowNum); writeErrors(errors); } resolve(hasErrors); }); parser.on('error', reject); readStream.pipe(parser); }); } private async validateExcel(filePath: string, outputErrorFile: string): Promise<boolean> { let hasErrors = false; const workbook = XLSX.readFile(filePath); const sheetName = workbook.SheetNames[0]; const sheet = workbook.Sheets[sheetName]; const rows: any[] = XLSX.utils.sheet_to_json(sheet, { defval: '' }); let rowNum = 1; for (let i = 0; i < rows.length; i += this.options.chunkSize!) { const chunk = rows.slice(i, i + this.options.chunkSize!); const errors = this.validateChunk(chunk, rowNum); if (errors.length > 0) { hasErrors = true; fs.appendFileSync(outputErrorFile, errors.join('\n') + '\n'); } rowNum += chunk.length; } return hasErrors; } private validateChunk(chunk: any[], startRow: number): string[] { const errors: string[] = []; chunk.forEach((row, idx) => { this.rules.forEach((rule) => { if (rule.condition && !rule.condition(row)) return; // Unique check if (rule.unique) { const val = row[rule.column]; if (val !== undefined && val !== null && val !== '') { if (this.seenValues[rule.column].has(val)) { errors.push(`Row ${startRow + idx + 1}: [${rule.column}] must be unique (duplicate value: ${val})`); return; } this.seenValues[rule.column].add(val); } } if (!rule.validate(row[rule.column], row)) { let rawValue = row[rule.column]; let safeValue = typeof rawValue === 'string' ? rawValue.replace(/[\r\n\t]+/g, ' ').slice(0, 200) : rawValue; errors.push(`Row ${startRow + idx + 1}: [${rule.column}] ${rule.errorMessage} (value: ${safeValue})`); } }); }); return errors; } }