kepler.gl
Version:
kepler.gl is a webgl based application to visualize large scale location data in the browser
489 lines (411 loc) • 42.7 kB
JavaScript
Object.defineProperty(exports, "__esModule", {
value: true
});
var _typeof2 = require('babel-runtime/helpers/typeof');
var _typeof3 = _interopRequireDefault(_typeof2);
var _extends2 = require('babel-runtime/helpers/extends');
var _extends3 = _interopRequireDefault(_extends2);
var _toArray2 = require('babel-runtime/helpers/toArray');
var _toArray3 = _interopRequireDefault(_toArray2);
exports.processCsvData = processCsvData;
exports.getSampleForTypeAnalyze = getSampleForTypeAnalyze;
exports.parseCsvDataByFieldType = parseCsvDataByFieldType;
exports.getFieldsFromData = getFieldsFromData;
exports.renameDuplicateFields = renameDuplicateFields;
exports.analyzerTypeToFieldType = analyzerTypeToFieldType;
exports.processRowObject = processRowObject;
exports.processGeojson = processGeojson;
exports.formatCsv = formatCsv;
exports.validateInputData = validateInputData;
var _d3Dsv = require('d3-dsv');
var _d3Array = require('d3-array');
var _window = require('global/window');
var _assert = require('assert');
var _assert2 = _interopRequireDefault(_assert);
var _typeAnalyzer = require('type-analyzer');
var _geojsonNormalize = require('@mapbox/geojson-normalize');
var _geojsonNormalize2 = _interopRequireDefault(_geojsonNormalize);
var _defaultSettings = require('../constants/default-settings');
var _dataUtils = require('../utils/data-utils');
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
// if any of these value occurs in csv, parse it to null;
// Copyright (c) 2018 Uber Technologies, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
var CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN'];
function processCsvData(rawData) {
// here we assume the csv file that people uploaded will have first row
// as name of the column
var _csvParseRows = (0, _d3Dsv.csvParseRows)(rawData),
_csvParseRows2 = (0, _toArray3.default)(_csvParseRows),
headerRow = _csvParseRows2[0],
rows = _csvParseRows2.slice(1);
if (!rows.length || !headerRow) {
// looks like an empty file
// resolve null, and catch them later in one place
return null;
}
cleanUpFalsyCsvValue(rows);
// No need to run type detection on every data point
// here we get a list of none null values to run analyze on
var sample = getSampleForTypeAnalyze({ fields: headerRow, allData: rows });
var fields = getFieldsFromData(sample, headerRow);
fields.forEach(parseCsvDataByFieldType.bind(null, rows));
return { fields: fields, rows: rows };
}
/**
* get fields from csv data
*
* @param {array} fields - an array of fields name
* @param {array} allData
* @param {array} sampleCount
* @returns {array} formatted fields
*/
function getSampleForTypeAnalyze(_ref) {
var fields = _ref.fields,
allData = _ref.allData,
_ref$sampleCount = _ref.sampleCount,
sampleCount = _ref$sampleCount === undefined ? 50 : _ref$sampleCount;
var total = Math.min(sampleCount, allData.length);
// const fieldOrder = fields.map(f => f.name);
var sample = (0, _d3Array.range)(0, total, 1).map(function (d) {
return {};
});
// collect sample data for each field
fields.forEach(function (field, fieldIdx) {
// data counter
var i = 0;
// sample counter
var j = 0;
while (j < total) {
if (i >= allData.length) {
// if depleted data pool
sample[j][field] = null;
j++;
} else if ((0, _dataUtils.notNullorUndefined)(allData[i][fieldIdx])) {
sample[j][field] = allData[i][fieldIdx];
j++;
i++;
} else {
i++;
}
}
});
return sample;
}
function cleanUpFalsyCsvValue(rows) {
for (var i = 0; i < rows.length; i++) {
for (var j = 0; j < rows[i].length; j++) {
// analyzer will set any fields to 'string' if there are empty values
// which will be parsed as '' by d3.csv
// here we parse empty data as null
// TODO: create warning when deltect `CSV_NULLS` in the data
if (!rows[i][j] || CSV_NULLS.includes(rows[i][j])) {
rows[i][j] = null;
}
}
}
}
/**
* Process uploaded csv file to parse value by field type
*
* @param {array} rows
* @param {object} field
* @param {number} i
* @returns {void}
*/
function parseCsvDataByFieldType(rows, field, i) {
var unixFormat = ['x', 'X'];
rows.forEach(function (row) {
if (row[i] !== null) {
switch (field.type) {
case _defaultSettings.ALL_FIELD_TYPES.real:
row[i] = parseFloat(row[i]);
break;
// TODO: timestamp can be either '1495827326' or '2016-03-10 11:20'
// if it's '1495827326' we pass it to int
case _defaultSettings.ALL_FIELD_TYPES.timestamp:
row[i] = unixFormat.includes(field.format) ? Number(row[i]) : row[i];
break;
case _defaultSettings.ALL_FIELD_TYPES.integer:
row[i] = parseInt(row[i], 10);
break;
case _defaultSettings.ALL_FIELD_TYPES.boolean:
// 0 and 1 only field can also be boolean
row[i] = row[i] === 'true' || row[i] === 'True' || row[i] === '1';
break;
default:
break;
}
}
});
}
/**
* get fields from csv data
*
* @param {array} data
* @param {array} fieldOrder
* @returns {array} formatted fields
*/
function getFieldsFromData(data, fieldOrder) {
// add a check for epoch timestamp
var metadata = _typeAnalyzer.Analyzer.computeColMeta(data, [{ regex: /.*geojson|all_points/g, dataType: 'GEOMETRY' }]);
var _renameDuplicateField = renameDuplicateFields(fieldOrder),
fieldByIndex = _renameDuplicateField.fieldByIndex;
return fieldOrder.reduce(function (orderedArray, field, index) {
var name = fieldByIndex[index];
var fieldMeta = metadata.find(function (m) {
return m.key === field;
});
var _ref2 = fieldMeta || {},
type = _ref2.type,
format = _ref2.format;
orderedArray[index] = {
name: name,
format: format,
// need this for mapbuilder conversion: filter type detection
// category,
tableFieldIndex: index + 1,
type: analyzerTypeToFieldType(type)
};
return orderedArray;
}, []);
}
/**
* pass in an array of field names, rename duplicated one
* and return a map from old field index to new name
*
* @param {array} fieldOrder
* @returns {Object} new field name by index
*/
function renameDuplicateFields(fieldOrder) {
return fieldOrder.reduce(function (accu, field, i) {
var allNames = accu.allNames;
var fieldName = field;
// add a counter to duplicated names
if (allNames.includes(field)) {
var counter = 0;
while (allNames.includes(field + '-' + counter)) {
counter++;
}
fieldName = field + '-' + counter;
}
accu.fieldByIndex[i] = fieldName;
accu.allNames.push(fieldName);
return accu;
}, { allNames: [], fieldByIndex: {} });
}
/**
* Map Analyzer types to local field types
*
* @param {string} aType
* @returns {string} corresponding type in ALL_FIELD_TYPES
*/
/* eslint-disable complexity */
function analyzerTypeToFieldType(aType) {
var DATE = _typeAnalyzer.DATA_TYPES.DATE,
TIME = _typeAnalyzer.DATA_TYPES.TIME,
DATETIME = _typeAnalyzer.DATA_TYPES.DATETIME,
NUMBER = _typeAnalyzer.DATA_TYPES.NUMBER,
INT = _typeAnalyzer.DATA_TYPES.INT,
FLOAT = _typeAnalyzer.DATA_TYPES.FLOAT,
BOOLEAN = _typeAnalyzer.DATA_TYPES.BOOLEAN,
STRING = _typeAnalyzer.DATA_TYPES.STRING,
CITY = _typeAnalyzer.DATA_TYPES.CITY,
GEOMETRY = _typeAnalyzer.DATA_TYPES.GEOMETRY,
GEOMETRY_FROM_STRING = _typeAnalyzer.DATA_TYPES.GEOMETRY_FROM_STRING,
ZIPCODE = _typeAnalyzer.DATA_TYPES.ZIPCODE,
PAIR_GEOMETRY_FROM_STRING = _typeAnalyzer.DATA_TYPES.PAIR_GEOMETRY_FROM_STRING;
// TODO: un recognized types
// CURRENCY PERCENT NONE
switch (aType) {
case DATE:
return _defaultSettings.ALL_FIELD_TYPES.date;
case TIME:
case DATETIME:
return _defaultSettings.ALL_FIELD_TYPES.timestamp;
case NUMBER:
case FLOAT:
return _defaultSettings.ALL_FIELD_TYPES.real;
case INT:
return _defaultSettings.ALL_FIELD_TYPES.integer;
case BOOLEAN:
return _defaultSettings.ALL_FIELD_TYPES.boolean;
case GEOMETRY:
case GEOMETRY_FROM_STRING:
case PAIR_GEOMETRY_FROM_STRING:
return _defaultSettings.ALL_FIELD_TYPES.geojson;
case STRING:
case CITY:
case ZIPCODE:
return _defaultSettings.ALL_FIELD_TYPES.string;
default:
_window.console.warn('Unsupported analyzer type: ' + aType);
return _defaultSettings.ALL_FIELD_TYPES.string;
}
}
/* eslint-enable complexity */
/*
* Process rawData where each row is an object
*/
function processRowObject(rawData) {
if (!rawData.length) {
return null;
}
var keys = Object.keys(rawData[0]);
var rows = rawData.map(function (d) {
return keys.map(function (key) {
return d[key];
});
});
var fields = getFieldsFromData(rawData, keys);
return {
fields: fields,
rows: rows
};
}
function processGeojson(rawData) {
var normalizedGeojson = (0, _geojsonNormalize2.default)(rawData);
if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) {
// fail to normalize geojson
return null;
}
// getting all feature fields
var allData = normalizedGeojson.features.reduce(function (accu, f, i) {
if (f.geometry) {
accu.push((0, _extends3.default)({
// add feature to _geojson field
_geojson: f
}, f.properties || {}));
}
return accu;
}, []);
// get all the field
var fields = allData.reduce(function (prev, curr) {
Object.keys(curr).forEach(function (key) {
if (!prev.includes(key)) {
prev.push(key);
}
});
return prev;
}, []);
// make sure each feature has exact same fields
allData.forEach(function (d) {
fields.forEach(function (f) {
if (!(f in d)) {
d[f] = null;
}
});
});
return processRowObject(allData);
}
/**
* On export data to csv
* @param data
* @param fields
*/
function formatCsv(data, fields) {
var columns = fields.map(function (f) {
return f.name;
});
var formattedData = [columns];
// parse geojson object as string
data.forEach(function (row) {
formattedData.push(row.map(function (d, i) {
return d && _defaultSettings.GEOJSON_FIELDS.geojson.includes(fields[i].name) ? JSON.stringify(d) : d;
}));
});
return (0, _d3Dsv.csvFormatRows)(formattedData);
}
/**
* @param data
* @returns {{allData: Array, fields: Array}}
*/
function validateInputData(data) {
// TODO: add test
/*
* expected input data format
* {
* fields: [],
* rows: []
* }
*/
var proceed = true;
if (!data) {
(0, _assert2.default)('receiveVisData: data cannot be null');
proceed = false;
} else if (!Array.isArray(data.fields)) {
(0, _assert2.default)('receiveVisData: expect data.fields to be an array');
proceed = false;
} else if (!Array.isArray(data.rows)) {
(0, _assert2.default)('receiveVisData: expect data.rows to be an array');
proceed = false;
}
if (!proceed) {
return null;
}
var fields = data.fields,
rows = data.rows;
// check if all fields has name, format and type
var allValid = fields.every(function (f, i) {
if ((typeof f === 'undefined' ? 'undefined' : (0, _typeof3.default)(f)) !== 'object') {
(0, _assert2.default)('fields needs to be an array of object, but find ' + f);
return false;
}
if (!f.name) {
(0, _assert2.default)('field.name is required but missing in field ' + JSON.stringify(f));
// assign a name
f.name = 'column_' + i;
}
if (!_defaultSettings.ALL_FIELD_TYPES[f.type]) {
(0, _assert2.default)('unknown field type ' + f.type);
return false;
}
return f.type && f.format && f.name;
});
if (allValid) {
return { rows: rows, fields: fields };
}
// if any field has missing type, recalculate it for everyone
// because we simply lost faith in humanity
var sampleData = getSampleForTypeAnalyze({ fields: fields.map(function (f) {
return f.name;
}), allData: rows });
var fieldOrder = fields.map(function (f) {
return f.name;
});
var meta = getFieldsFromData(sampleData, fieldOrder);
var updatedFields = fields.map(function (f, i) {
return (0, _extends3.default)({}, f, {
type: meta[i].type,
format: meta[i].format
});
});
return { fields: updatedFields, rows: rows };
}
exports.default = {
processGeojson: processGeojson,
processCsvData: processCsvData,
processRowObject: processRowObject,
analyzerTypeToFieldType: analyzerTypeToFieldType,
getFieldsFromData: getFieldsFromData,
parseCsvDataByFieldType: parseCsvDataByFieldType
};
//# sourceMappingURL=data:application/json;charset=utf-8;base64,
;