UNPKG

json-2-csv

Version:

A JSON to CSV and CSV to JSON converter that natively supports sub-documents and auto-generates the CSV heading.

395 lines (346 loc) 15.1 kB
'use strict'; let path = require('doc-path'), deeks = require('deeks'), constants = require('./constants.json'), utils = require('./utils'); const Json2Csv = function(options) { const wrapDelimiterCheckRegex = new RegExp(options.delimiter.wrap, 'g'), crlfSearchRegex = /\r?\n|\r/, expandingWithoutUnwinding = options.expandArrayObjects && !options.unwindArrays, deeksOptions = { expandArrayObjects: expandingWithoutUnwinding, ignoreEmptyArraysWhenExpanding: expandingWithoutUnwinding }; /** HEADER FIELD FUNCTIONS **/ /** * Returns the list of data field names of all documents in the provided list * @param data {Array<Object>} Data to be converted * @returns {Promise.<Array[String]>} */ function getFieldNameList(data) { // If keys weren't specified, then we'll use the list of keys generated by the deeks module return Promise.resolve(deeks.deepKeysFromList(data, deeksOptions)); } /** * Processes the schemas by checking for schema differences, if so desired. * If schema differences are not to be checked, then it resolves the unique * list of field names. * @param documentSchemas * @returns {Promise.<Array[String]>} */ function processSchemas(documentSchemas) { // If the user wants to check for the same schema (regardless of schema ordering) if (options.checkSchemaDifferences) { return checkSchemaDifferences(documentSchemas); } else { // Otherwise, we do not care if the schemas are different, so we should get the unique list of keys let uniqueFieldNames = utils.unique(utils.flatten(documentSchemas)); return Promise.resolve(uniqueFieldNames); } } /** * This function performs the schema difference check, if the user specifies that it should be checked. * If there are no field names, then there are no differences. * Otherwise, we get the first schema and the remaining list of schemas * @param documentSchemas * @returns {*} */ function checkSchemaDifferences(documentSchemas) { // have multiple documents - ensure only one schema (regardless of field ordering) let firstDocSchema = documentSchemas[0], restOfDocumentSchemas = documentSchemas.slice(1), schemaDifferences = computeNumberOfSchemaDifferences(firstDocSchema, restOfDocumentSchemas); // If there are schema inconsistencies, throw a schema not the same error if (schemaDifferences) { return Promise.reject(new Error(constants.errors.json2csv.notSameSchema)); } return Promise.resolve(firstDocSchema); } /** * Computes the number of schema differences * @param firstDocSchema * @param restOfDocumentSchemas * @returns {*} */ function computeNumberOfSchemaDifferences(firstDocSchema, restOfDocumentSchemas) { return restOfDocumentSchemas.reduce((schemaDifferences, documentSchema) => { // If there is a difference between the schemas, increment the counter of schema inconsistencies let numberOfDifferences = utils.computeSchemaDifferences(firstDocSchema, documentSchema).length; return numberOfDifferences > 0 ? schemaDifferences + 1 : schemaDifferences; }, 0); } /** * If so specified, this sorts the header field names alphabetically * @param fieldNames {Array<String>} * @returns {Array<String>} sorted field names, or unsorted if sorting not specified */ function sortHeaderFields(fieldNames) { if (options.sortHeader) { return fieldNames.sort(); } return fieldNames; } /** * Trims the header fields, if the user desires them to be trimmed. * @param params * @returns {*} */ function trimHeaderFields(params) { if (options.trimHeaderFields) { params.headerFields = params.headerFields.map((field) => field.split('.') .map((component) => component.trim()) .join('.') ); } return params; } /** * Wrap the headings, if desired by the user. * @param params * @returns {*} */ function wrapHeaderFields(params) { // only perform this if we are actually prepending the header if (options.prependHeader) { params.headerFields = params.headerFields.map(function(headingKey) { return wrapFieldValueIfNecessary(headingKey); }); } return params; } /** * Generates the CSV header string by joining the headerFields by the field delimiter * @param params * @returns {*} */ function generateCsvHeader(params) { params.header = params.headerFields.join(options.delimiter.field); return params; } /** * Retrieve the headings for all documents and return it. * This checks that all documents have the same schema. * @param data * @returns {Promise} */ function retrieveHeaderFields(data) { if (options.keys && !options.unwindArrays) { return Promise.resolve(options.keys) .then(sortHeaderFields); } return getFieldNameList(data) .then(processSchemas) .then(sortHeaderFields); } /** RECORD FIELD FUNCTIONS **/ /** * Unwinds objects in arrays within record objects if the user specifies the * expandArrayObjects option. If not specified, this passes the params * argument through to the next function in the promise chain. * @param params {Object} * @returns {Promise} */ function unwindRecordsIfNecessary(params) { if (options.unwindArrays) { const originalRecordsLength = params.records.length; // Unwind each of the documents at the given headerField params.headerFields.forEach((headerField) => { params.records = utils.unwind(params.records, headerField); }); return retrieveHeaderFields(params.records) .then((headerFields) => { params.headerFields = headerFields; // If we were able to unwind more arrays, then try unwinding again... if (originalRecordsLength !== params.records.length) { return unwindRecordsIfNecessary(params); } // Otherwise, we didn't unwind any additional arrays, so continue... // If keys were provided, set the headerFields to the provided keys: if (options.keys) { params.headerFields = options.keys; } return params; }); } return params; } /** * Main function which handles the processing of a record, or document to be converted to CSV format * This function specifies and performs the necessary operations in the necessary order * in order to obtain the data and convert it to CSV form while maintaining RFC 4180 compliance. * * Order of operations: * - Get fields from provided key list (as array of actual values) * - Convert the values to csv/string representation [possible option here for custom converters?] * - Trim fields * - Determine if they need to be wrapped (& wrap if necessary) * - Combine values for each line (by joining by field delimiter) * @param params * @returns {*} */ function processRecords(params) { params.records = params.records.map((record) => { // Retrieve data for each of the headerFields from this record let recordFieldData = retrieveRecordFieldData(record, params.headerFields), // Process the data in this record and return the processedRecordData = recordFieldData.map((fieldValue) => { fieldValue = trimRecordFieldValue(fieldValue); fieldValue = recordFieldValueToString(fieldValue); fieldValue = wrapFieldValueIfNecessary(fieldValue); return fieldValue; }); // Join the record data by the field delimiter return generateCsvRowFromRecord(processedRecordData); }).join(options.delimiter.eol); return params; } /** * Helper function intended to process *just* array values when the expandArrayObjects setting is set to true * @param recordFieldValue * @returns {*} processed array value */ function processRecordFieldDataForExpandedArrayObject(recordFieldValue) { let filteredRecordFieldValue = utils.removeEmptyFields(recordFieldValue); // If we have an array and it's either empty of full of empty values, then use an empty value representation if (!recordFieldValue.length || !filteredRecordFieldValue.length) { return options.emptyFieldValue || ''; } else if (filteredRecordFieldValue.length === 1) { // Otherwise, we have an array of actual values... // Since we are expanding array objects, we will want to key in on values of objects. return filteredRecordFieldValue[0]; // Extract the single value in the array } return recordFieldValue; } /** * Gets all field values from a particular record for the given list of fields * @param record * @param fields * @returns {Array} */ function retrieveRecordFieldData(record, fields) { let recordValues = []; fields.forEach((field) => { let recordFieldValue = path.evaluatePath(record, field); if (!utils.isUndefined(options.emptyFieldValue) && utils.isEmptyField(recordFieldValue)) { recordFieldValue = options.emptyFieldValue; } else if (options.expandArrayObjects && Array.isArray(recordFieldValue)) { recordFieldValue = processRecordFieldDataForExpandedArrayObject(recordFieldValue); } recordValues.push(recordFieldValue); }); return recordValues; } /** * Converts a record field value to its string representation * @param fieldValue * @returns {*} */ function recordFieldValueToString(fieldValue) { if (Array.isArray(fieldValue) || utils.isObject(fieldValue) && !utils.isDate(fieldValue)) { return JSON.stringify(fieldValue); } else if (utils.isUndefined(fieldValue)) { return 'undefined'; } else if (utils.isNull(fieldValue)) { return 'null'; } else { return !options.useLocaleFormat ? fieldValue.toString() : fieldValue.toLocaleString(); } } /** * Trims the record field value, if specified by the user's provided options * @param fieldValue * @returns {*} */ function trimRecordFieldValue(fieldValue) { if (options.trimFieldValues) { if (Array.isArray(fieldValue)) { return fieldValue.map(trimRecordFieldValue); } else if (utils.isString(fieldValue)) { return fieldValue.trim(); } return fieldValue; } return fieldValue; } /** * Escapes quotation marks in the field value, if necessary, and appropriately * wraps the record field value if it contains a comma (field delimiter), * quotation mark (wrap delimiter), or a line break (CRLF) * @param fieldValue * @returns {*} */ function wrapFieldValueIfNecessary(fieldValue) { const wrapDelimiter = options.delimiter.wrap; // eg. includes quotation marks (default delimiter) if (fieldValue.includes(options.delimiter.wrap)) { // add an additional quotation mark before each quotation mark appearing in the field value fieldValue = fieldValue.replace(wrapDelimiterCheckRegex, wrapDelimiter + wrapDelimiter); } // if the field contains a comma (field delimiter), quotation mark (wrap delimiter), line break, or CRLF // then enclose it in quotation marks (wrap delimiter) if (fieldValue.includes(options.delimiter.field) || fieldValue.includes(options.delimiter.wrap) || fieldValue.match(crlfSearchRegex)) { // wrap the field's value in a wrap delimiter (quotation marks by default) fieldValue = wrapDelimiter + fieldValue + wrapDelimiter; } return fieldValue; } /** * Generates the CSV record string by joining the field values together by the field delimiter * @param recordFieldValues */ function generateCsvRowFromRecord(recordFieldValues) { return recordFieldValues.join(options.delimiter.field); } /** CSV COMPONENT COMBINER/FINAL PROCESSOR **/ /** * Performs the final CSV construction by combining the fields in the appropriate * order depending on the provided options values and sends the generated CSV * back to the user * @param params */ function generateCsvFromComponents(params) { let header = params.header, records = params.records, // If we are prepending the header, then add an EOL, otherwise just return the records csv = (options.excelBOM ? constants.values.excelBOM : '') + (options.prependHeader ? header + options.delimiter.eol : '') + records; return params.callback(null, csv); } /** MAIN CONVERTER FUNCTION **/ /** * Internally exported json2csv function * Takes data as either a document or array of documents and a callback that will be used to report the results * @param data {Object|Array<Object>} documents to be converted to csv * @param callback {Function} callback function */ function convert(data, callback) { // Single document, not an array if (utils.isObject(data) && !data.length) { data = [data]; // Convert to an array of the given document } // Retrieve the heading and then generate the CSV with the keys that are identified retrieveHeaderFields(data) .then((headerFields) => ({ headerFields, callback, records: data })) .then(unwindRecordsIfNecessary) .then(processRecords) .then(wrapHeaderFields) .then(trimHeaderFields) .then(generateCsvHeader) .then(generateCsvFromComponents) .catch(callback); } return { convert, validationFn: utils.isObject, validationMessages: constants.errors.json2csv }; }; module.exports = { Json2Csv };