UNPKG

kepler.gl

Version:

kepler.gl is a webgl based application to visualize large scale location data in the browser

489 lines (411 loc) 42.7 kB
'use strict'; Object.defineProperty(exports, "__esModule", { value: true }); var _typeof2 = require('babel-runtime/helpers/typeof'); var _typeof3 = _interopRequireDefault(_typeof2); var _extends2 = require('babel-runtime/helpers/extends'); var _extends3 = _interopRequireDefault(_extends2); var _toArray2 = require('babel-runtime/helpers/toArray'); var _toArray3 = _interopRequireDefault(_toArray2); exports.processCsvData = processCsvData; exports.getSampleForTypeAnalyze = getSampleForTypeAnalyze; exports.parseCsvDataByFieldType = parseCsvDataByFieldType; exports.getFieldsFromData = getFieldsFromData; exports.renameDuplicateFields = renameDuplicateFields; exports.analyzerTypeToFieldType = analyzerTypeToFieldType; exports.processRowObject = processRowObject; exports.processGeojson = processGeojson; exports.formatCsv = formatCsv; exports.validateInputData = validateInputData; var _d3Dsv = require('d3-dsv'); var _d3Array = require('d3-array'); var _window = require('global/window'); var _assert = require('assert'); var _assert2 = _interopRequireDefault(_assert); var _typeAnalyzer = require('type-analyzer'); var _geojsonNormalize = require('@mapbox/geojson-normalize'); var _geojsonNormalize2 = _interopRequireDefault(_geojsonNormalize); var _defaultSettings = require('../constants/default-settings'); var _dataUtils = require('../utils/data-utils'); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } // if any of these value occurs in csv, parse it to null; // Copyright (c) 2018 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. var CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN']; function processCsvData(rawData) { // here we assume the csv file that people uploaded will have first row // as name of the column var _csvParseRows = (0, _d3Dsv.csvParseRows)(rawData), _csvParseRows2 = (0, _toArray3.default)(_csvParseRows), headerRow = _csvParseRows2[0], rows = _csvParseRows2.slice(1); if (!rows.length || !headerRow) { // looks like an empty file // resolve null, and catch them later in one place return null; } cleanUpFalsyCsvValue(rows); // No need to run type detection on every data point // here we get a list of none null values to run analyze on var sample = getSampleForTypeAnalyze({ fields: headerRow, allData: rows }); var fields = getFieldsFromData(sample, headerRow); fields.forEach(parseCsvDataByFieldType.bind(null, rows)); return { fields: fields, rows: rows }; } /** * get fields from csv data * * @param {array} fields - an array of fields name * @param {array} allData * @param {array} sampleCount * @returns {array} formatted fields */ function getSampleForTypeAnalyze(_ref) { var fields = _ref.fields, allData = _ref.allData, _ref$sampleCount = _ref.sampleCount, sampleCount = _ref$sampleCount === undefined ? 50 : _ref$sampleCount; var total = Math.min(sampleCount, allData.length); // const fieldOrder = fields.map(f => f.name); var sample = (0, _d3Array.range)(0, total, 1).map(function (d) { return {}; }); // collect sample data for each field fields.forEach(function (field, fieldIdx) { // data counter var i = 0; // sample counter var j = 0; while (j < total) { if (i >= allData.length) { // if depleted data pool sample[j][field] = null; j++; } else if ((0, _dataUtils.notNullorUndefined)(allData[i][fieldIdx])) { sample[j][field] = allData[i][fieldIdx]; j++; i++; } else { i++; } } }); return sample; } function cleanUpFalsyCsvValue(rows) { for (var i = 0; i < rows.length; i++) { for (var j = 0; j < rows[i].length; j++) { // analyzer will set any fields to 'string' if there are empty values // which will be parsed as '' by d3.csv // here we parse empty data as null // TODO: create warning when deltect `CSV_NULLS` in the data if (!rows[i][j] || CSV_NULLS.includes(rows[i][j])) { rows[i][j] = null; } } } } /** * Process uploaded csv file to parse value by field type * * @param {array} rows * @param {object} field * @param {number} i * @returns {void} */ function parseCsvDataByFieldType(rows, field, i) { var unixFormat = ['x', 'X']; rows.forEach(function (row) { if (row[i] !== null) { switch (field.type) { case _defaultSettings.ALL_FIELD_TYPES.real: row[i] = parseFloat(row[i]); break; // TODO: timestamp can be either '1495827326' or '2016-03-10 11:20' // if it's '1495827326' we pass it to int case _defaultSettings.ALL_FIELD_TYPES.timestamp: row[i] = unixFormat.includes(field.format) ? Number(row[i]) : row[i]; break; case _defaultSettings.ALL_FIELD_TYPES.integer: row[i] = parseInt(row[i], 10); break; case _defaultSettings.ALL_FIELD_TYPES.boolean: // 0 and 1 only field can also be boolean row[i] = row[i] === 'true' || row[i] === 'True' || row[i] === '1'; break; default: break; } } }); } /** * get fields from csv data * * @param {array} data * @param {array} fieldOrder * @returns {array} formatted fields */ function getFieldsFromData(data, fieldOrder) { // add a check for epoch timestamp var metadata = _typeAnalyzer.Analyzer.computeColMeta(data, [{ regex: /.*geojson|all_points/g, dataType: 'GEOMETRY' }]); var _renameDuplicateField = renameDuplicateFields(fieldOrder), fieldByIndex = _renameDuplicateField.fieldByIndex; return fieldOrder.reduce(function (orderedArray, field, index) { var name = fieldByIndex[index]; var fieldMeta = metadata.find(function (m) { return m.key === field; }); var _ref2 = fieldMeta || {}, type = _ref2.type, format = _ref2.format; orderedArray[index] = { name: name, format: format, // need this for mapbuilder conversion: filter type detection // category, tableFieldIndex: index + 1, type: analyzerTypeToFieldType(type) }; return orderedArray; }, []); } /** * pass in an array of field names, rename duplicated one * and return a map from old field index to new name * * @param {array} fieldOrder * @returns {Object} new field name by index */ function renameDuplicateFields(fieldOrder) { return fieldOrder.reduce(function (accu, field, i) { var allNames = accu.allNames; var fieldName = field; // add a counter to duplicated names if (allNames.includes(field)) { var counter = 0; while (allNames.includes(field + '-' + counter)) { counter++; } fieldName = field + '-' + counter; } accu.fieldByIndex[i] = fieldName; accu.allNames.push(fieldName); return accu; }, { allNames: [], fieldByIndex: {} }); } /** * Map Analyzer types to local field types * * @param {string} aType * @returns {string} corresponding type in ALL_FIELD_TYPES */ /* eslint-disable complexity */ function analyzerTypeToFieldType(aType) { var DATE = _typeAnalyzer.DATA_TYPES.DATE, TIME = _typeAnalyzer.DATA_TYPES.TIME, DATETIME = _typeAnalyzer.DATA_TYPES.DATETIME, NUMBER = _typeAnalyzer.DATA_TYPES.NUMBER, INT = _typeAnalyzer.DATA_TYPES.INT, FLOAT = _typeAnalyzer.DATA_TYPES.FLOAT, BOOLEAN = _typeAnalyzer.DATA_TYPES.BOOLEAN, STRING = _typeAnalyzer.DATA_TYPES.STRING, CITY = _typeAnalyzer.DATA_TYPES.CITY, GEOMETRY = _typeAnalyzer.DATA_TYPES.GEOMETRY, GEOMETRY_FROM_STRING = _typeAnalyzer.DATA_TYPES.GEOMETRY_FROM_STRING, ZIPCODE = _typeAnalyzer.DATA_TYPES.ZIPCODE, PAIR_GEOMETRY_FROM_STRING = _typeAnalyzer.DATA_TYPES.PAIR_GEOMETRY_FROM_STRING; // TODO: un recognized types // CURRENCY PERCENT NONE switch (aType) { case DATE: return _defaultSettings.ALL_FIELD_TYPES.date; case TIME: case DATETIME: return _defaultSettings.ALL_FIELD_TYPES.timestamp; case NUMBER: case FLOAT: return _defaultSettings.ALL_FIELD_TYPES.real; case INT: return _defaultSettings.ALL_FIELD_TYPES.integer; case BOOLEAN: return _defaultSettings.ALL_FIELD_TYPES.boolean; case GEOMETRY: case GEOMETRY_FROM_STRING: case PAIR_GEOMETRY_FROM_STRING: return _defaultSettings.ALL_FIELD_TYPES.geojson; case STRING: case CITY: case ZIPCODE: return _defaultSettings.ALL_FIELD_TYPES.string; default: _window.console.warn('Unsupported analyzer type: ' + aType); return _defaultSettings.ALL_FIELD_TYPES.string; } } /* eslint-enable complexity */ /* * Process rawData where each row is an object */ function processRowObject(rawData) { if (!rawData.length) { return null; } var keys = Object.keys(rawData[0]); var rows = rawData.map(function (d) { return keys.map(function (key) { return d[key]; }); }); var fields = getFieldsFromData(rawData, keys); return { fields: fields, rows: rows }; } function processGeojson(rawData) { var normalizedGeojson = (0, _geojsonNormalize2.default)(rawData); if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) { // fail to normalize geojson return null; } // getting all feature fields var allData = normalizedGeojson.features.reduce(function (accu, f, i) { if (f.geometry) { accu.push((0, _extends3.default)({ // add feature to _geojson field _geojson: f }, f.properties || {})); } return accu; }, []); // get all the field var fields = allData.reduce(function (prev, curr) { Object.keys(curr).forEach(function (key) { if (!prev.includes(key)) { prev.push(key); } }); return prev; }, []); // make sure each feature has exact same fields allData.forEach(function (d) { fields.forEach(function (f) { if (!(f in d)) { d[f] = null; } }); }); return processRowObject(allData); } /** * On export data to csv * @param data * @param fields */ function formatCsv(data, fields) { var columns = fields.map(function (f) { return f.name; }); var formattedData = [columns]; // parse geojson object as string data.forEach(function (row) { formattedData.push(row.map(function (d, i) { return d && _defaultSettings.GEOJSON_FIELDS.geojson.includes(fields[i].name) ? JSON.stringify(d) : d; })); }); return (0, _d3Dsv.csvFormatRows)(formattedData); } /** * @param data * @returns {{allData: Array, fields: Array}} */ function validateInputData(data) { // TODO: add test /* * expected input data format * { * fields: [], * rows: [] * } */ var proceed = true; if (!data) { (0, _assert2.default)('receiveVisData: data cannot be null'); proceed = false; } else if (!Array.isArray(data.fields)) { (0, _assert2.default)('receiveVisData: expect data.fields to be an array'); proceed = false; } else if (!Array.isArray(data.rows)) { (0, _assert2.default)('receiveVisData: expect data.rows to be an array'); proceed = false; } if (!proceed) { return null; } var fields = data.fields, rows = data.rows; // check if all fields has name, format and type var allValid = fields.every(function (f, i) { if ((typeof f === 'undefined' ? 'undefined' : (0, _typeof3.default)(f)) !== 'object') { (0, _assert2.default)('fields needs to be an array of object, but find ' + f); return false; } if (!f.name) { (0, _assert2.default)('field.name is required but missing in field ' + JSON.stringify(f)); // assign a name f.name = 'column_' + i; } if (!_defaultSettings.ALL_FIELD_TYPES[f.type]) { (0, _assert2.default)('unknown field type ' + f.type); return false; } return f.type && f.format && f.name; }); if (allValid) { return { rows: rows, fields: fields }; } // if any field has missing type, recalculate it for everyone // because we simply lost faith in humanity var sampleData = getSampleForTypeAnalyze({ fields: fields.map(function (f) { return f.name; }), allData: rows }); var fieldOrder = fields.map(function (f) { return f.name; }); var meta = getFieldsFromData(sampleData, fieldOrder); var updatedFields = fields.map(function (f, i) { return (0, _extends3.default)({}, f, { type: meta[i].type, format: meta[i].format }); }); return { fields: updatedFields, rows: rows }; } exports.default = { processGeojson: processGeojson, processCsvData: processCsvData, processRowObject: processRowObject, analyzerTypeToFieldType: analyzerTypeToFieldType, getFieldsFromData: getFieldsFromData, parseCsvDataByFieldType: parseCsvDataByFieldType }; //# sourceMappingURL=data:application/json;charset=utf-8;base64,