file-validation-engine
Version:
A dynamic rule-based validation engine for Excel and CSV files
144 lines (130 loc) • 5.35 kB
text/typescript
import fs from 'fs';
import path from 'path';
import { parse as csvParse } from 'csv-parse';
import * as XLSX from 'xlsx';
export type Rule = {
column: string;
validate: (value: any, row: any) => boolean;
errorMessage: string;
condition?: (row: any) => boolean;
unique?: boolean;
};
export interface RuleEngineOptions {
chunkSize?: number;
}
export class RuleEngine {
private rules: Rule[];
private options: RuleEngineOptions;
private seenValues: Record<string, Set<any>> = {};
constructor(rules: Rule[], options: RuleEngineOptions = {}) {
this.rules = rules;
this.options = { chunkSize: 10000, ...options };
this.initUniqueTracking();
}
private initUniqueTracking() {
this.seenValues = {};
this.rules.forEach(rule => {
if (rule.unique) this.seenValues[rule.column] = new Set();
});
}
async validateFile(
filePath: string,
outputErrorFile: string
): Promise<boolean> {
// Reset seenValues for each file
this.rules.forEach(rule => {
if (rule.unique) this.seenValues[rule.column] = new Set();
});
let hasErrors = false;
const ext = path.extname(filePath).toLowerCase();
if (ext === '.csv') {
hasErrors = await this.validateCSV(filePath, outputErrorFile);
} else if (ext === '.xlsx' || ext === '.xls') {
hasErrors = await this.validateExcel(filePath, outputErrorFile);
} else {
throw new Error('Unsupported file type');
}
return hasErrors;
}
private async validateCSV(filePath: string, outputErrorFile: string): Promise<boolean> {
return new Promise<boolean>((resolve, reject) => {
let hasErrors = false;
const readStream = fs.createReadStream(filePath);
const parser = csvParse({ columns: true });
let rowNum = 1;
let chunk: any[] = [];
const writeErrors = (errors: string[]) => {
if (errors.length > 0) {
hasErrors = true;
fs.appendFileSync(outputErrorFile, errors.join('\n') + '\n');
}
};
parser.on('readable', () => {
let record;
while ((record = parser.read())) {
chunk.push(record);
if (chunk.length >= this.options.chunkSize!) {
const errors = this.validateChunk(chunk, rowNum);
writeErrors(errors);
rowNum += chunk.length;
chunk = [];
}
}
});
parser.on('end', () => {
if (chunk.length > 0) {
const errors = this.validateChunk(chunk, rowNum);
writeErrors(errors);
}
resolve(hasErrors);
});
parser.on('error', reject);
readStream.pipe(parser);
});
}
private async validateExcel(filePath: string, outputErrorFile: string): Promise<boolean> {
let hasErrors = false;
const workbook = XLSX.readFile(filePath);
const sheetName = workbook.SheetNames[0];
const sheet = workbook.Sheets[sheetName];
const rows: any[] = XLSX.utils.sheet_to_json(sheet, { defval: '' });
let rowNum = 1;
for (let i = 0; i < rows.length; i += this.options.chunkSize!) {
const chunk = rows.slice(i, i + this.options.chunkSize!);
const errors = this.validateChunk(chunk, rowNum);
if (errors.length > 0) {
hasErrors = true;
fs.appendFileSync(outputErrorFile, errors.join('\n') + '\n');
}
rowNum += chunk.length;
}
return hasErrors;
}
private validateChunk(chunk: any[], startRow: number): string[] {
const errors: string[] = [];
chunk.forEach((row, idx) => {
this.rules.forEach((rule) => {
if (rule.condition && !rule.condition(row)) return;
// Unique check
if (rule.unique) {
const val = row[rule.column];
if (val !== undefined && val !== null && val !== '') {
if (this.seenValues[rule.column].has(val)) {
errors.push(`Row ${startRow + idx + 1}: [${rule.column}] must be unique (duplicate value: ${val})`);
return;
}
this.seenValues[rule.column].add(val);
}
}
if (!rule.validate(row[rule.column], row)) {
let rawValue = row[rule.column];
let safeValue = typeof rawValue === 'string'
? rawValue.replace(/[\r\n\t]+/g, ' ').slice(0, 200)
: rawValue;
errors.push(`Row ${startRow + idx + 1}: [${rule.column}] ${rule.errorMessage} (value: ${safeValue})`);
}
});
});
return errors;
}
}