UNPKG

json-2-csv

Version:

A JSON to CSV and CSV to JSON converter that natively supports sub-documents and auto-generates the CSV heading.

457 lines (456 loc) 20.5 kB
'use strict'; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.Json2Csv = void 0; const doc_path_1 = require("doc-path"); const deeks_1 = require("deeks"); const constants_1 = require("./constants"); const utils = __importStar(require("./utils")); const Json2Csv = function (options) { const wrapDelimiterCheckRegex = new RegExp(options.delimiter.wrap, 'g'), crlfSearchRegex = /\r?\n|\r/, customValueParser = options.parseValue && typeof options.parseValue === 'function' ? options.parseValue : null, expandingWithoutUnwinding = options.expandArrayObjects && !options.unwindArrays, deeksOptions = { arrayIndexesAsKeys: options.arrayIndexesAsKeys, expandNestedObjects: options.expandNestedObjects, expandArrayObjects: expandingWithoutUnwinding, ignoreEmptyArraysWhenExpanding: expandingWithoutUnwinding, escapeNestedDots: true, }; /** HEADER FIELD FUNCTIONS **/ /** * Returns the list of data field names of all documents in the provided list */ function getFieldNameList(data) { // If keys weren't specified, then we'll use the list of keys generated by the deeks module return (0, deeks_1.deepKeysFromList)(data, deeksOptions); } /** * Processes the schemas by checking for schema differences, if so desired. * If schema differences are not to be checked, then it resolves the unique * list of field names. */ function processSchemas(documentSchemas) { // If there are no document schemas then there is nothing to diff and no unique fields to get if (documentSchemas.length === 0) { return []; } // If the user wants to check for the same schema (regardless of schema ordering) if (options.checkSchemaDifferences) { return checkSchemaDifferences(documentSchemas); } else { // Otherwise, we do not care if the schemas are different, so we should get the unique list of keys const uniqueFieldNames = utils.unique(utils.flatten(documentSchemas)); return uniqueFieldNames; } } /** * This function performs the schema difference check, if the user specifies that it should be checked. * If there are no field names, then there are no differences. * Otherwise, we get the first schema and the remaining list of schemas */ function checkSchemaDifferences(documentSchemas) { // have multiple documents - ensure only one schema (regardless of field ordering) const firstDocSchema = documentSchemas[0], restOfDocumentSchemas = documentSchemas.slice(1), schemaDifferences = computeNumberOfSchemaDifferences(firstDocSchema, restOfDocumentSchemas); // If there are schema inconsistencies, throw a schema not the same error if (schemaDifferences) { throw new Error(constants_1.errors.json2csv.notSameSchema); } return firstDocSchema; } /** * Computes the number of schema differences */ function computeNumberOfSchemaDifferences(firstDocSchema, restOfDocumentSchemas) { return restOfDocumentSchemas.reduce((schemaDifferences, documentSchema) => { // If there is a difference between the schemas, increment the counter of schema inconsistencies const numberOfDifferences = utils.computeSchemaDifferences(firstDocSchema, documentSchema).length; return numberOfDifferences > 0 ? schemaDifferences + 1 : schemaDifferences; }, 0); } /** * If so specified, this filters the detected key paths to exclude any keys that have been specified */ function filterExcludedKeys(keyPaths) { if (options.excludeKeys) { return keyPaths.filter((keyPath) => { for (const excludedKey of options.excludeKeys) { // Only match if the excludedKey appears at the beginning of the string so we don't accidentally match a key farther down in a key path const regex = excludedKey instanceof RegExp ? excludedKey : new RegExp(`^${excludedKey}`); if (excludedKey === keyPath || keyPath.match(regex)) { return false; // Exclude the key } } return true; // Otherwise, include the key }); } return keyPaths; } /** * If so specified, this sorts the header field names alphabetically */ function sortHeaderFields(fieldNames) { if (options.sortHeader && typeof options.sortHeader === 'function') { return fieldNames.sort(options.sortHeader); } else if (options.sortHeader) { return fieldNames.sort(); } return fieldNames; } /** * Trims the header fields, if the user desires them to be trimmed. */ function trimHeaderFields(params) { if (options.trimHeaderFields) { params.headerFields = params.headerFields.map((field) => field.split('.') .map((component) => component.trim()) .join('.')); } return params; } /** * Wrap the headings, if desired by the user. */ function wrapHeaderFields(params) { // only perform this if we are actually prepending the header if (options.prependHeader) { params.headerFields = params.headerFields.map(function (headingKey) { return wrapFieldValueIfNecessary(headingKey); }); } return params; } /** * Generates the CSV header string by joining the headerFields by the field delimiter */ function generateCsvHeader(params) { // #185 - generate a keys list to avoid finding native Map() methods const fieldTitleMapKeys = Object.keys(options.fieldTitleMap); params.header = params.headerFields .map(function (field) { let headerKey = field; // If a custom field title was provided for this field, use that if (fieldTitleMapKeys.includes(field)) { headerKey = options.fieldTitleMap[field]; } else if (!options.escapeHeaderNestedDots) { // Otherwise, if the user doesn't want nested dots in keys to be escaped, then unescape them headerKey = headerKey.replace(/\\\./g, '.'); } return headerKey; }) .join(options.delimiter.field); return params; } function convertKeysToHeaderFields() { if (!options.keys) return []; return options.keys.map((key) => { if (typeof key === 'object' && 'field' in key) { options.fieldTitleMap[key.field] = key.title ?? key.field; return key.field; } return key; }); } function extractWildcardMatchKeys() { if (!options.keys) return []; return options.keys.flatMap(item => { if (typeof item === 'string') { // Exclude plain strings that were passed in options.keys return []; } else if (item?.wildcardMatch) { // Return "field" value for objects with wildcardMatch: true return item.field; } // Exclude other objects return []; }); } /** * Retrieve the headings for all documents and return it. * This checks that all documents have the same schema. */ function retrieveHeaderFields(data) { const wildcardMatchKeys = extractWildcardMatchKeys(); const keyStrings = convertKeysToHeaderFields(); const fieldNames = getFieldNameList(data); const processed = processSchemas(fieldNames); if (options.keys) { options.keys = keyStrings; const matchedKeys = keyStrings.flatMap((userProvidedKey) => { // If this is not a wildcard matched key, then just return and include it in the resulting key list if (!wildcardMatchKeys.includes(userProvidedKey)) { return userProvidedKey; } // Otherwise, identify all detected keys that match with the provided wildcard key: const matches = []; const regex = new RegExp(`^${userProvidedKey}`); for (const detectedKey of processed) { if (userProvidedKey === detectedKey || detectedKey.match(regex)) { matches.push(detectedKey); } } return matches; }); if (!options.unwindArrays) { const filtered = filterExcludedKeys(matchedKeys); return sortHeaderFields(filtered); } } const filtered = filterExcludedKeys(processed); return sortHeaderFields(filtered); } /** RECORD FIELD FUNCTIONS **/ function stillNeedsUnwind(params) { for (const record of params.records) { for (const field of params.headerFields) { const value = (0, doc_path_1.evaluatePath)(record, field); if (Array.isArray(value)) { return true; } } } return false; } /** * Unwinds objects in arrays within record objects if the user specifies the * expandArrayObjects option. If not specified, this passes the params * argument through to the next function in the promise chain. * * The `finalPass` parameter is used to trigger one last pass to ensure no more * arrays need to be expanded */ function unwindRecordsIfNecessary(params, finalPass = false) { if (options.unwindArrays) { // Unwind each document at each header field params.headerFields.forEach((headerField) => { params.records = utils.unwind(params.records, headerField); }); params.headerFields = retrieveHeaderFields(params.records); // Continue unwinding if any nested arrays remain if (stillNeedsUnwind(params)) { return unwindRecordsIfNecessary(params, finalPass); } // Run a final time in case the earlier unwinding exposed additional // arrays to unwind... if (!finalPass) { return unwindRecordsIfNecessary(params, true); } // If keys were provided, set the headerFields back to the provided keys after unwinding: if (options.keys) { const userSelectedFields = convertKeysToHeaderFields(); params.headerFields = filterExcludedKeys(userSelectedFields); } return params; } return params; } /** * Main function which handles the processing of a record, or document to be converted to CSV format * This function specifies and performs the necessary operations in the necessary order * in order to obtain the data and convert it to CSV form while maintaining RFC 4180 compliance. * * Order of operations: * - Get fields from provided key list (as array of actual values) * - Convert the values to csv/string representation [possible option here for custom converters?] * - Trim fields * - Determine if they need to be wrapped (& wrap if necessary) * - Combine values for each line (by joining by field delimiter) */ function processRecords(params) { params.recordString = params.records.map((record) => { // Retrieve data for each of the headerFields from this record const recordFieldData = retrieveRecordFieldData(record, params.headerFields), // Process the data in this record and return the processedRecordData = recordFieldData.map((fieldValue) => { fieldValue = trimRecordFieldValue(fieldValue); fieldValue = preventCsvInjection(fieldValue); let stringified = customValueParser ? customValueParser(fieldValue, recordFieldValueToString) : recordFieldValueToString(fieldValue); stringified = wrapFieldValueIfNecessary(stringified); return stringified; }); // Join the record data by the field delimiter return generateCsvRowFromRecord(processedRecordData); }).join(options.delimiter.eol); return params; } /** * Helper function intended to process *just* array values when the expandArrayObjects setting is set to true */ function processRecordFieldDataForExpandedArrayObject(recordFieldValue) { const filteredRecordFieldValue = utils.removeEmptyFields(recordFieldValue); // If we have an array and it's either empty of full of empty values, then use an empty value representation if (!recordFieldValue.length || !filteredRecordFieldValue.length) { return options.emptyFieldValue || ''; } else if (filteredRecordFieldValue.length === 1) { // Otherwise, we have an array of actual values... // Since we are expanding array objects, we will want to key in on values of objects. return filteredRecordFieldValue[0]; // Extract the single value in the array } return recordFieldValue; } /** * Gets all field values from a particular record for the given list of fields */ function retrieveRecordFieldData(record, fields) { const recordValues = []; fields.forEach((field) => { let recordFieldValue = (0, doc_path_1.evaluatePath)(record, field); if (!utils.isUndefined(options.emptyFieldValue) && utils.isEmptyField(recordFieldValue)) { recordFieldValue = options.emptyFieldValue; } else if (options.expandArrayObjects && Array.isArray(recordFieldValue)) { recordFieldValue = processRecordFieldDataForExpandedArrayObject(recordFieldValue); } recordValues.push(recordFieldValue); }); return recordValues; } /** * Converts a record field value to its string representation */ function recordFieldValueToString(fieldValue) { const isDate = fieldValue instanceof Date; // store to avoid checking twice if (fieldValue === null || Array.isArray(fieldValue) || typeof fieldValue === 'object' && !isDate) { return JSON.stringify(fieldValue); } else if (typeof fieldValue === 'undefined') { return 'undefined'; } else if (isDate && options.useDateIso8601Format) { return fieldValue.toISOString(); } else { return !options.useLocaleFormat ? fieldValue.toString() : fieldValue.toLocaleString(); } } /** * Trims the record field value, if specified by the user's provided options */ function trimRecordFieldValue(fieldValue) { if (options.trimFieldValues) { if (Array.isArray(fieldValue)) { return fieldValue.map(trimRecordFieldValue); } else if (typeof fieldValue === 'string') { return fieldValue.trim(); } return fieldValue; } return fieldValue; } /** * Prevent CSV injection on strings if specified by the user's provided options. * Mitigation will be done by ensuring that the first character doesn't being with: * Equals (=), Plus (+), Minus (-), At (@), Tab (0x09), Carriage return (0x0D). * More info: https://owasp.org/www-community/attacks/CSV_Injection */ function preventCsvInjection(fieldValue) { if (options.preventCsvInjection) { if (Array.isArray(fieldValue)) { return fieldValue.map(preventCsvInjection); } else if (typeof fieldValue === 'string' && !utils.isNumber(fieldValue)) { return fieldValue.replace(/^[=+\-@\t\r]+/g, ''); } return fieldValue; } return fieldValue; } /** * Escapes quotation marks in the field value, if necessary, and appropriately * wraps the record field value if it contains a comma (field delimiter), * quotation mark (wrap delimiter), or a line break (CRLF) */ function wrapFieldValueIfNecessary(fieldValue) { const wrapDelimiter = options.delimiter.wrap; // eg. includes quotation marks (default delimiter) if (fieldValue.includes(options.delimiter.wrap)) { // add an additional quotation mark before each quotation mark appearing in the field value fieldValue = fieldValue.replace(wrapDelimiterCheckRegex, wrapDelimiter + wrapDelimiter); } // if the field contains a comma (field delimiter), quotation mark (wrap delimiter), line break, or CRLF // then enclose it in quotation marks (wrap delimiter) if (fieldValue.includes(options.delimiter.field) || fieldValue.includes(options.delimiter.wrap) || fieldValue.match(crlfSearchRegex) || options.wrapBooleans && (fieldValue === 'true' || fieldValue === 'false')) { // wrap the field's value in a wrap delimiter (quotation marks by default) fieldValue = wrapDelimiter + fieldValue + wrapDelimiter; } return fieldValue; } /** * Generates the CSV record string by joining the field values together by the field delimiter */ function generateCsvRowFromRecord(recordFieldValues) { return recordFieldValues.join(options.delimiter.field); } /** CSV COMPONENT COMBINER/FINAL PROCESSOR **/ /** * Performs the final CSV construction by combining the fields in the appropriate * order depending on the provided options values and sends the generated CSV * back to the user */ function generateCsvFromComponents(params) { const header = params.header, records = params.recordString, // If we are prepending the header, then add an EOL, otherwise just return the records csv = (options.excelBOM ? constants_1.excelBOM : '') + (options.prependHeader ? header + options.delimiter.eol : '') + records; return csv; } /** MAIN CONVERTER FUNCTION **/ /** * Internally exported json2csv function */ function convert(data) { // Single document, not an array if (!Array.isArray(data)) { data = [data]; // Convert to an array of the given document } // Retrieve the heading and then generate the CSV with the keys that are identified const headerFields = { headerFields: retrieveHeaderFields(data), records: data, header: '', recordString: '', }; const unwinded = unwindRecordsIfNecessary(headerFields); const processed = processRecords(unwinded); const wrapped = wrapHeaderFields(processed); const trimmed = trimHeaderFields(wrapped); const generated = generateCsvHeader(trimmed); return generateCsvFromComponents(generated); } return { convert, }; }; exports.Json2Csv = Json2Csv;