UNPKG

csv-do

Version:

Command line utilities for CSV based on NodeJS. Split, Group, Aggregate, Mix, Merge csv files.

435 lines (364 loc) 15 kB
const ObjectsToCsv = require('objects-to-csv'); (() => { const fs = require('fs'); const csv = require('csvtojson'); const objectsToCsv = require('objects-to-csv'); function doAction(_args) { let action = _args._[0]; console.log("ACTION", action); switch (action) { case "split": case "sp": console.log("SPLIT REQUESTED!"); split(_args); break; case "compare": case "co": console.log("COMPARE REQUESTED"); compare(_args); break; case "join": case "jo": console.log("JOIN REQUESTED!"); join(); break; case "aggregate": case "ag": console.log("AGGREGATE REQUESTED!"); aggregate(); break; case "find-duplicates": case "fd": console.log("FIND DUPLICATES REQUESTED!"); findDuplicates(); break; case "check": case "ck": checkCsv(_args); break; default: console.error(`ERROR: Invalid Action ${action}`); } } function compare(_args) { const file1 = _args.f1; const file2 = _args.f2; const columns = _args.c; const searchColumns = _args.sc; const outputFile = _args.ofl; const valParams = compare_validateParams(file1, file2, columns, outputFile); if (valParams) { console.log("Parameters are valid".green); console.log("Will compare files: ", file1, file2); console.log("Columns Spec:", columns); console.log("Search Columns Spec:", searchColumns); console.log("Output File:", outputFile); console.log("Reading File1...", file1); csv() .fromFile(file1) .then((csv1) => { console.log("File 1 read!", csv1.length); console.log("Reading File2...", file2); csv() .fromFile(file2) .then((csv2) => { console.log("File 2 read!", csv2.length); if (!searchColumns) { const result = compare_wo_cols(csv1, csv2, outputFile); console.log("result wo cols"); } else { const result = compare_by_cols(csv1, csv2, columns, outputFile); console.log("result by cols", result); } }) .error((e) => { console.error("ERROR!", e); }); }) .error((e) => { console.error("ERROR!", e); }); } else { console.error("Parameters are invalid!".red); } } /** * Compares two csv contents row by row and outputs the results of the comparison. * @param {fs file} csv1 * @param {fs file} csv2 * @param {output file path} outputFile */ function compare_wo_cols(csv1, csv2, outputFile) { let findings = []; let rowsWithFindings = 0; let cellsWithFindings = 0; for (rowNum = 0; rowNum < csv1.length; rowNum++) { let row1 = csv1[rowNum]; let row2 = csv2[rowNum]; let rowComparison = compareRows(rowNum, row1, row2); if (rowComparison.length > 0) { rowsWithFindings++; cellsWithFindings += rowComparison.length; findings.push(rowComparison); } } if (findings === undefined || findings.length === 0) { if (csv1.length === csv2.length) { console.log(`No differences found on files. Both have ${csv1.length} rows that match field by field`.green); } else { console.log(`All rows in file1 match rows in file2.`.green); console.log(` Though file2 has more rows! ${csv1.length} rows in file1 vs. ${csv2.length} rows in file2`.green); } } else { console.log(`Issues found in ${cellsWithFindings} cells within ${rowsWithFindings} rows!`.red); } return findings; } function compareRows(rowNum, row1, row2) { let row1Keys = Object.keys(row1); let keyIndex = 0; let issuesFound = []; for (key of row1Keys) { if (rowNum === 0) { console.log(`Analyzing row ${rowNum} for key ${key}`); } if (row1[key] !== row2[key]) { issuesFound.push(`Column "${key}"! V1:"${row1[key]}" should match V2:"${row2[key]}" @ row ${rowNum}`.yellow); } keyIndex++; } if (issuesFound.length === 0) { //console.log(`ROW: ${rowNum} is a full match!`.green); } else { console.log(`ROW: ${rowNum} has differences!`.red); issuesFound.map((issue) => { console.log(` Item: ${issue}`); }); } return issuesFound; } /** * Compares two csv contents finding matching rows by the provided columns. * @param {*} csv1 * @param {*} csv2 * @param {*} columns * @param {*} outputFile */ function compare_by_cols(csv1, csv2, columns, outputFile) { } function compare_validateParams(file1, file2, columns, outputFile) { let errorCount = 0; if (!fs.existsSync(file1)) { console.error("ERROR", "--file1 parameter is invalid!", file1); errorCount++; } if (!fs.existsSync(file2)) { console.error("ERROR", "--file2 parameter is invalid!", file2); errorCount++; } /* if (!outputFile) { console.error("ERROR", "--output-file parameter is invalid!", file2); errorCount++; } */ if (columns) { let colArray = JSON.parse(`[${columns}]`); if (colArray.length == 0) { console.error("ERROR", "--columns parameter is invalid!", file2); errorCount++; } } if (errorCount === 0) { return { file1, file2, columns, outputFile }; } } function split(_args) { const inputFilePath = _args.if; const columns = _args.c; const chunkSize = _args.cs; const outputFolderPath = _args.of; const valParams = split_validateParams(inputFilePath, columns, chunkSize, outputFolderPath); if (valParams) { console.log("Parameters are valid".green); console.log("Will split file: ", inputFilePath); console.log("Columns Spec:", columns); console.log("Chunk Size:", chunkSize); console.log("Output Folder:", outputFolderPath); console.log("Reading the file..."); csv() .fromFile(inputFilePath) .then((inputArray) => { console.log("Reading file Complete!".green); let splitContent = {}; if (chunkSize > 0) { console.log(`SPLITTING BY CHUNKS. ${inputArray.length} lines`); splitContent = split_by_chunks(inputArray, valParams); } else { console.log(`SPLITTING BY COLUMNS. ${inputArray.length} lines`); splitContent = split_by_cols(inputArray, valParams) } console.log("Spliting complete!".green); console.log("Split Keys: ".yellow, Object.keys(splitContent).length); console.log(Object.keys(splitContent)); console.log("Saving files..."); split_saveFiles(splitContent, outputFolderPath); }); } else { console.error("Parameters are invalid!".red); } } function split_by_chunks(inputArray, valParams) { let splitContent = inputArray.reduce((resultArray, item, index) => { const chunkIndex = Math.floor(index / valParams.chunkSize); if (!resultArray[chunkIndex]) { resultArray[chunkIndex] = []; } resultArray[chunkIndex].push(item); return resultArray; }, []); return splitContent; } function split_by_cols(inputArray, valParams) { let splitContent = {}; for (i = 0; i < inputArray.length; i++) { let item = inputArray[i]; let splitKeys = valParams.columns.map((colNumber) => { return Object.keys(item)[colNumber - 1]; }); let splitKey = ""; for (j = 0; j < splitKeys.length; j++) { splitKey += (j === 0) ? item[splitKeys[j]] : "|" + item[splitKeys[j]]; } if (splitContent[splitKey] === undefined) { splitContent[splitKey] = []; } splitContent[splitKey].push(item); } return splitContent; } function split_saveFiles(splitContent, folder) { let totalLines = 0; let totalFiles = 0; for (item in splitContent) { let sanitizedFolder = (folder.slice(-1) === "/" || folder.slice(-1) === "\\") ? folder : `${folder}/`; let destinationFileName = `${folder}${item.replace(/ /g, "_").toLowerCase()}_${splitContent[item].length}.csv` let data = splitContent[item]; (async () => { const tocsv = new ObjectsToCsv(data); await tocsv.toDisk(destinationFileName); })(); console.log("File Saved!".green, destinationFileName); totalFiles++; totalLines += splitContent[item].length; } console.log(`Finished saving ${totalFiles} files!`.green); console.log(`Total Lines: ${totalLines}`); } /** * Validates parameters for the split command. Returns an object with the validated parameters if validation passess. Generates errors if not. * @param {*} inputFilePath * @param {*} columns * @param {*} chunkSize * @param {*} outputFolderPath * @returns */ function split_validateParams(inputFilePath, columns, chunkSize, outputFolderPath) { let errorCount = 0; if (!fs.existsSync(inputFilePath)) { console.error("ERROR", "--input-file parameter is invalid!", inputFilePath); errorCount++; } if ( (!columns && !chunkSize) || (!columns && !chunkSize > 0) ) { console.error("ERROR", "--columns or --chunk-size parameters are invalid!", columns, chunkSize); errorCount++; } if (!chunkSize) { try { let colArray = JSON.parse(`[${columns}]`); console.log("COLARRAY", colArray) if (colArray && colArray.length === 0) { console.error("ERROR", "Invalid column array specification ( --columns )", columns, chunkSize); errorCount++; } else { columns = colArray; } } catch (ex) { console.error("ERROR", "Invalid column array specification ( --columns )", columns, chunkSize); errorCount++; } } if (!fs.existsSync(outputFolderPath)) { console.error("ERROR", "--output-folder parameter is invalid!", outputFolderPath); errorCount++; } if (errorCount === 0) { return { inputFilePath, columns, chunkSize, outputFolderPath }; } } function join() { console.error("OUR APOLOGIES. JOIN IS NOT IMPLEMENTED YET. PLEASE COME BACK SOON!".bgRed); } function aggregate() { console.error("OUR APOLOGIES. AGGREGATE IS NOT IMPLEMENTED YET. PLEASE COME BACK SOON!".bgRed); } function findDuplicates() { console.error("OUR APOLOGIES. FIND DUPLICATES IS NOT IMPLEMENTED YET. PLEASE COME BACK SOON!".bgRed); } function checkCsv(_args) { const csvFile = _args.if; const schemaFile = _args.schema; if (!fs.existsSync(csvFile)) { console.error("ERROR: CSV file does not exist:", csvFile); return; } if (!fs.existsSync(schemaFile)) { console.error("ERROR: Schema file does not exist:", schemaFile); return; } Promise.all([csv().fromFile(csvFile), fs.promises.readFile(schemaFile)]) .then(([csvData, schemaRaw]) => { const schema = JSON.parse(schemaRaw); const expectedHeaders = schema.columns; const dateTimeColumns = schema.datetime_columns || []; // Check headers const actualHeaders = Object.keys(csvData[0]); if (JSON.stringify(expectedHeaders) !== JSON.stringify(actualHeaders)) { console.error("ERROR: CSV headers do not match schema!"); console.error("Expected:", expectedHeaders); console.error("Actual:", actualHeaders); return; } // Check each row let rowErrors = 0; csvData.forEach((row, idx) => { const cols = Object.keys(row); // Check hidden columns if (cols.length !== expectedHeaders.length) { rowErrors++; console.error(`Row ${idx + 1}: Incorrect number of columns (expected ${expectedHeaders.length}, found ${cols.length}).`); } // Check datetime format (YYYY-mm-dd hh:mm:ss.fff) dateTimeColumns.forEach(dtCol => { if (row[dtCol] && !/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})\.(\d{3})$/.test(row[dtCol])) { rowErrors++; console.error(`Row ${idx + 1}, Column "${dtCol}": Incorrect datetime format (expected YYYY-mm-dd hh:mm:ss.fff). Found: "${row[dtCol]}"`); } }); }); if (rowErrors === 0) { console.log("CSV Check PASSED! No errors found."); } else { console.error(`CSV Check FAILED! ${rowErrors} error(s) found.`); } }) .catch(err => { console.error("An error occurred during validation:", err); }); } exports.Do = doAction; })();