neurex
Version:
A trainable neural network in NodeJS. Designed for ease of implementation and ANN modelling
389 lines (347 loc) • 14.3 kB
JavaScript
/**
*
* CsvDataHandler
*
*/
const fs = require('fs');
const path = require('path');
const {MinMaxScaler} = require('./normalizer');
/**
* CsvDataHandler is a utility tool for that allows you to extract and manipulate data from your .csv dataset.
*
* @class
*/
class CsvDataHandler {
/**
* Creates an instance of CsvDataHandler.
*/
constructor() {
/**
* The expected file extension for CSV files.
* @private
* @type {string}
*/
this._FILE_EXTENSION = '.csv';
/**
* The name of the loaded file.
* @type {string}
*/
this.fileName = '';
/**
* Array of column names extracted from the CSV file.
* @type {string[]}
*/
this.columnNames = [];
/**
* The raw data extracted from the CSV file, as an array of arrays.
* @type {Array<Array<string>>}
*/
this.data = [];
}
/**
* Opens and reads the provided CSV file and maps its contents into an array of arrays.
* The first row is treated as column names and stored separately.
*
* @method read_csv
* @param {string} filename - The path to the CSV file.
* @returns {Array<Array<string>>} An array of arrays representing the CSV data, with column names removed from the data array.
* @throws {Error} If no file is provided, or if the file has an unsupported extension.
* @example
* const loader = new CsvDataHandler();
* try {
* const data = loader.read_csv('my_data.csv');
* console.log(data); // [[value1, value2], [value3, value4]]
* console.log(loader.columnNames); // ['header1', 'header2']
* } catch (error) {
* console.error(error.message);
* }
*/
read_csv(filename) {
if (!filename) {
throw new Error("[ERROR]------ No file provided.");
}
const dir = path.dirname(require.main.filename);
const extension_name = path.extname(filename);
if (extension_name !== this._FILE_EXTENSION) {
throw new Error(`[ERROR]------- Unsupported file extension '${extension_name}'. Only accepts '${this._FILE_EXTENSION}' format.`);
}
this.fileName = filename;
const csvPath = path.join(dir, filename);
try {
const fileContent = fs.readFileSync(csvPath, 'utf-8');
const lines = fileContent.split('\n').filter(line => line.trim());
if (lines.length === 0) {
this.columnNames = [];
this.data = [];
return [];
}
// Extract column names from the first line
this.columnNames = lines[0].split(',').map(cell => cell.trim());
// Extract data rows (excluding the header)
this.data = lines.slice(1).map(row => row.split(',').map(cell => cell.trim()));
return this.data;
} catch (err) {
// Re-throw specific errors or wrap them for better context
if (err.code === 'ENOENT') {
throw new Error(`[ERROR]------- File not found: ${csvPath}`);
}
throw new Error(`[ERROR]------- Failed to read CSV file: ${err.message}`);
}
}
/**
* Converts all elements in every row of the provided data array to numerical values.
* Ensure that all elements are numeric, otherwise, they will result in `NaN`.
*
* @method rowsToInt
* @param {Array<Array<string>>} data - The extracted data from the CSV, where elements are strings.
* @returns {Array<Array<number>>} An array of arrays with all elements converted to numbers.
* @throws {Error} If no data is provided.
* @example
* const loader = new CsvDataHandler();
* const stringData = [['1', '2'], ['3', '4']];
* const numberData = loader.rowsToInt(stringData);
* console.log(numberData); // [[1, 2], [3, 4]]
*/
rowsToInt(data) {
if (!data) {
throw new Error("[ERROR]------- No data is passed.");
}
return data.map(arr => {
return arr.map(cell => Number(cell));
});
}
/**
* Selects a range of elements from each row of the provided array.
*
* @method getRowElements
* @param {number} setRange - The number of elements to select from the beginning of each row.
* @param {Array<Array<any>>} array - The data from which to extract elements.
* @returns {Array<Array<any>>} An array of arrays containing the selected elements.
* @throws {Error} If `setRange` is invalid or `array` is not provided.
* @example
* const loader = new CsvDataHandler();
* const data = [[1, 2, 3], [4, 5, 6]];
* const selected = loader.getRowElements(2, data);
* console.log(selected); // [[1, 2], [4, 5]]
*/
getRowElements(setRange, array) {
if (typeof setRange !== 'number' || isNaN(setRange) || setRange < 0 || !array) {
throw new Error(`[ERROR]------- Invalid setRange: ${setRange} or array: ${array}.`);
}
return array.map(arr => {
return arr.slice(0, setRange);
});
}
/**
* Removes specified columns from the dataset and updates the column names.
*
* @method removeColumns
* @param {string[]} fields - An array of column names to remove.
* @param {Array<Array<any>>} data - The dataset from which to remove columns.
* @returns {Array<Array<any>>} The modified dataset with the specified columns removed.
* @throws {Error} If no fields are provided or data is missing, or if a specified column is not found.
* @example
* const loader = new CsvDataHandler();
* loader.columnNames = ['A', 'B', 'C'];
* const data = [['a1', 'b1', 'c1'], ['a2', 'b2', 'c2']];
* const newData = loader.removeColumns(['B'], data);
* console.log(newData); // [['a1', 'c1'], ['a2', 'c2']]
* console.log(loader.columnNames); // ['A', 'C']
*/
removeColumns(fields = [], data) {
if (!Array.isArray(fields) || fields.length === 0 || !data) {
throw new Error(`[ERROR]------- Invalid fields: ${fields} or data: ${data}.`);
}
let currentColumnNames = [...this.columnNames];
let currentData = data.map(row => [...row]); // Create a shallow copy of data to avoid direct mutation issues during index shifting
// Identify indices to remove, and sort them in descending order
// to avoid issues with index shifting when splicing
const indicesToRemove = fields
.map(field => {
const index = currentColumnNames.indexOf(field);
if (index === -1) {
throw new Error(`[ERROR]------- Column '${field}' not found.`);
}
return index;
})
.sort((a, b) => b - a); // Sort descending
// Remove columns from data
for (const rowIndex in currentData) {
for (const index of indicesToRemove) {
currentData[rowIndex].splice(index, 1);
}
}
// Remove column names
for (const index of indicesToRemove) {
currentColumnNames.splice(index, 1);
}
this.columnNames = currentColumnNames;
return currentData;
}
/**
* Extracts a column as a 1D array and removes that column from the dataset and column names.
*
* @method extractColumn
* @param {string} columnName - The name of the column to extract.
* @param {Array<Array<any>>} data - The dataset rows from which to extract the column.
* @returns {Array<any>} A 1D array containing the extracted values.
* @throws {Error} If `columnName` or `data` is missing, or if the specified column is not found.
* @example
* const loader = new CsvDataHandler();
* loader.columnNames = ['A', 'B', 'C'];
* const data = [['a1', 'b1', 'c1'], ['a2', 'b2', 'c2']];
* const extracted = loader.extractColumn('B', data);
* console.log(extracted); // ['b1', 'b2']
* console.log(data); // [['a1', 'c1'], ['a2', 'c2']] (data is mutated)
* console.log(loader.columnNames); // ['A', 'C']
*/
extractColumn(columnName, data) {
if (!columnName || !data) {
throw new Error(`[ERROR]------- columnName: ${columnName}, data: ${data}.`);
}
const index = this.columnNames.indexOf(columnName);
if (index === -1) {
throw new Error(`[ERROR]------- Column '${columnName}' not found.`);
}
// Remove the column name from the internal array
this.columnNames.splice(index, 1);
// Extract column values and simultaneously modify rows
const extractedValues = data.map(row => {
const value = row[index];
row.splice(index, 1); // Mutate the row to remove the value
return [value];
});
return extractedValues;
}
/**
* Normalizes the provided data using the specified method.
* Available methods:
* - 'MinMax': normalizes data using Min-Max scaling. (0-1 range)
*
* @method normalize
* @param {String} method - the normalization method to use.
* @param {Array<Array<number>} data - the data to be normalized.
* @throws {Error} If no method or data is provided, or if the method is unsupported.
* @returns {Array>Array<number>} - normalized data.
* @example
* const loader = new CsvDataHandler();
* const data = [[1, 2], [3, 4]];
* const normalized = loader.normalize('MiMax', data);
* console.log(normalized); // normalized data based on MinMax scaling;
*/
normalize(method, data) {
try {
// check if method and data are provided
if (!method || !data || data.length == 0) {
throw new Error(`[ERROR]------- No method nor data is provided.`);
}
// perform normalization based on the provided method
if (method.toLowerCase() ==="minmax") {
const scaler = new MinMaxScaler(); // using the MinMaxScaler class from the nornamizer module
scaler.fit(data); // fit data to get the min and max values
return scaler.transform(data); // returns the normalized data;
}
else {
throw new Error(`[ERROR]------- Unsupported normalization`);
}
}
catch (error) {
console.log(error);
}
}
/**
*
* Returns rows from row 1 to the specified range and removes the rest
*
* @method trimRows
* @param {Number} range - range
* @param {Array<Array<any>>} data - the extracted data
* @returns {Array<Array<any>>} - trim dataset
* @throws {Error} - if no parameters are passed
*
*
*
*/
trimRows(range, data) {
try {
return data.slice(0, range);
}
catch (error) {
console.log(error);
}
}
/**
* Displays the provided data in a tabular format, including column names.
*
* @method tabularize
* @param {Array<Array<any>>} data - The data to display in a tabular format.
* @throws {Error} If no data is provided.
* @example
* const loader = new CsvDataHandler();
* loader.columnNames = ['Name', 'Age'];
* const data = [['Alice', 30], ['Bob', 24]];
* loader.tabularize(data);
* // Expected output in console:
* // Name Age
* // Alice 30
* // Bob 24
*/
tabularize(data) {
if (!data || data.length === 0) {
throw new Error(`[ERROR]-------- No data is provided.`);
}
if (Array.isArray(data[0])) {
// Calculate maximum column widths for proper alignment
const columnWidths = this.columnNames.map(name => name.length);
data.forEach(row => {
row.forEach((cell, i) => {
columnWidths[i] = Math.max(columnWidths[i], String(cell).length);
});
});
// Print column names
let headerRow = '';
this.columnNames.forEach((name, i) => {
headerRow += name.padEnd(columnWidths[i] + 4); // Add padding for spacing
});
console.log(headerRow);
console.log('-'.repeat(headerRow.length)); // Separator line
// Print data rows
data.forEach(row => {
let rowData = '';
row.forEach((cell, i) => {
rowData += String(cell).padEnd(columnWidths[i] + 4);
});
console.log(rowData);
});
}
else {
data.forEach((row, i) => {
console.log(i+1,'.',row);
});
}
}
/**
*
* @param {String} file_Name - name of your CSV file
* @param {Array<Array<any>>} data
*
* Export the loaded data to CSV.
*/
exportCSV(file_Name, data) {
const columnNames = this.columnNames;
const dir = path.dirname(require.main.filename);
const escape = (value) => {
if (typeof value === 'string' && (value.includes(',') || value.includes('"') || value.includes('\n'))) {
return `"${value.replace(/"/g, '""')}"`; // escape double quotes
}
return value;
};
const headers = columnNames.join(',');
const NewData = data.map(row => row.map(escape).join(',')).join('\n');
const csv = `${headers}\n${NewData}`;
const file = path.join(dir, `${file_Name}.csv`);
fs.writeFileSync(file, csv);
console.log(`[SUCCESS]------- Exported file exported as ${file_Name}.csv`);
}
}
module.exports = CsvDataHandler;