kepler.gl
Version:
kepler.gl is a webgl based application to visualize large scale location data in the browser
430 lines (375 loc) • 11.2 kB
JavaScript
// Copyright (c) 2018 Uber Technologies, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
import {csvParseRows, csvFormatRows} from 'd3-dsv';
import {range} from 'd3-array';
import {console as globalConsole} from 'global/window';
import assert from 'assert';
import {Analyzer, DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
import normalize from '@mapbox/geojson-normalize';
import {ALL_FIELD_TYPES, GEOJSON_FIELDS} from 'constants/default-settings';
import {notNullorUndefined} from 'utils/data-utils';
// if any of these value occurs in csv, parse it to null;
const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN'];
export function processCsvData(rawData) {
// here we assume the csv file that people uploaded will have first row
// as name of the column
//TODO: add a alert at upload csv to remind define first row
const [headerRow, ...rows] = csvParseRows(rawData);
if (!rows.length || !headerRow) {
// looks like an empty file
// resolve null, and catch them later in one place
return null;
}
cleanUpFalsyCsvValue(rows);
// No need to run type detection on every data point
// here we get a list of none null values to run analyze on
const sample = getSampleForTypeAnalyze({fields: headerRow, allData: rows});
const fields = getFieldsFromData(sample, headerRow);
fields.forEach(parseCsvDataByFieldType.bind(null, rows));
return {fields, rows};
}
/**
* get fields from csv data
*
* @param {array} fields - an array of fields name
* @param {array} allData
* @param {array} sampleCount
* @returns {array} formatted fields
*/
export function getSampleForTypeAnalyze({fields, allData, sampleCount = 50}) {
const total = Math.min(sampleCount, allData.length);
// const fieldOrder = fields.map(f => f.name);
const sample = range(0, total, 1).map(d => ({}));
// collect sample data for each field
fields.forEach((field, fieldIdx) => {
// data counter
let i = 0;
// sample counter
let j = 0;
while (j < total) {
if (i >= allData.length) {
// if depleted data pool
sample[j][field] = null;
j++;
} else if (notNullorUndefined(allData[i][fieldIdx])) {
sample[j][field] = allData[i][fieldIdx];
j++;
i++;
} else {
i++;
}
}
});
return sample;
}
function cleanUpFalsyCsvValue(rows) {
for (let i = 0; i < rows.length; i++) {
for (let j = 0; j < rows[i].length; j++) {
// analyzer will set any fields to 'string' if there are empty values
// which will be parsed as '' by d3.csv
// here we parse empty data as null
// TODO: create warning when deltect `CSV_NULLS` in the data
if (!rows[i][j] || CSV_NULLS.includes(rows[i][j])) {
rows[i][j] = null;
}
}
}
}
/**
* Process uploaded csv file to parse value by field type
*
* @param {array} rows
* @param {object} field
* @param {number} i
* @returns {void}
*/
export function parseCsvDataByFieldType(rows, field, i) {
const unixFormat = ['x', 'X'];
rows.forEach(row => {
if (row[i] !== null) {
switch (field.type) {
case ALL_FIELD_TYPES.real:
row[i] = parseFloat(row[i]);
break;
// TODO: timestamp can be either '1495827326' or '2016-03-10 11:20'
// if it's '1495827326' we pass it to int
case ALL_FIELD_TYPES.timestamp:
row[i] = unixFormat.includes(field.format) ? Number(row[i]) : row[i];
break;
case ALL_FIELD_TYPES.integer:
row[i] = parseInt(row[i], 10);
break;
case ALL_FIELD_TYPES.boolean:
// 0 and 1 only field can also be boolean
row[i] = row[i] === 'true' || row[i] === 'True' || row[i] === '1';
break;
default:
break;
}
}
});
}
/**
* get fields from csv data
*
* @param {array} data
* @param {array} fieldOrder
* @returns {array} formatted fields
*/
export function getFieldsFromData(data, fieldOrder) {
// add a check for epoch timestamp
const metadata = Analyzer.computeColMeta(data, [
{regex: /.*geojson|all_points/g, dataType: 'GEOMETRY'}
]);
const {fieldByIndex} = renameDuplicateFields(fieldOrder);
return fieldOrder.reduce((orderedArray, field, index) => {
const name = fieldByIndex[index];
const fieldMeta = metadata.find(m => m.key === field);
const {type, format} = fieldMeta || {};
orderedArray[index] = {
name,
format,
// need this for mapbuilder conversion: filter type detection
// category,
tableFieldIndex: index + 1,
type: analyzerTypeToFieldType(type)
};
return orderedArray;
}, []);
}
/**
* pass in an array of field names, rename duplicated one
* and return a map from old field index to new name
*
* @param {array} fieldOrder
* @returns {Object} new field name by index
*/
export function renameDuplicateFields(fieldOrder) {
return fieldOrder.reduce(
(accu, field, i) => {
const {allNames} = accu;
let fieldName = field;
// add a counter to duplicated names
if (allNames.includes(field)) {
let counter = 0;
while (allNames.includes(`${field}-${counter}`)) {
counter++;
}
fieldName = `${field}-${counter}`;
}
accu.fieldByIndex[i] = fieldName;
accu.allNames.push(fieldName);
return accu;
},
{allNames: [], fieldByIndex: {}}
);
}
/**
* Map Analyzer types to local field types
*
* @param {string} aType
* @returns {string} corresponding type in ALL_FIELD_TYPES
*/
/* eslint-disable complexity */
export function analyzerTypeToFieldType(aType) {
const {
DATE,
TIME,
DATETIME,
NUMBER,
INT,
FLOAT,
BOOLEAN,
STRING,
CITY,
GEOMETRY,
GEOMETRY_FROM_STRING,
ZIPCODE,
PAIR_GEOMETRY_FROM_STRING
} = AnalyzerDATA_TYPES;
// TODO: un recognized types
// CURRENCY PERCENT NONE
switch (aType) {
case DATE:
return ALL_FIELD_TYPES.date;
case TIME:
case DATETIME:
return ALL_FIELD_TYPES.timestamp;
case NUMBER:
case FLOAT:
return ALL_FIELD_TYPES.real;
case INT:
return ALL_FIELD_TYPES.integer;
case BOOLEAN:
return ALL_FIELD_TYPES.boolean;
case GEOMETRY:
case GEOMETRY_FROM_STRING:
case PAIR_GEOMETRY_FROM_STRING:
return ALL_FIELD_TYPES.geojson;
case STRING:
case CITY:
case ZIPCODE:
return ALL_FIELD_TYPES.string;
default:
globalConsole.warn(`Unsupported analyzer type: ${aType}`);
return ALL_FIELD_TYPES.string;
}
}
/* eslint-enable complexity */
/*
* Process rawData where each row is an object
*/
export function processRowObject(rawData) {
if (!rawData.length) {
return null;
}
const keys = Object.keys(rawData[0]);
const rows = rawData.map(d => keys.map(key => d[key]));
const fields = getFieldsFromData(rawData, keys);
return {
fields,
rows
};
}
export function processGeojson(rawData) {
const normalizedGeojson = normalize(rawData);
if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) {
// fail to normalize geojson
return null;
}
// getting all feature fields
const allData = normalizedGeojson.features.reduce((accu, f, i) => {
if (f.geometry) {
accu.push({
// add feature to _geojson field
_geojson: f,
...(f.properties || {})
});
}
return accu;
}, []);
// get all the field
const fields = allData.reduce((prev, curr) => {
Object.keys(curr).forEach(key => {
if (!prev.includes(key)) {
prev.push(key);
}
});
return prev;
}, []);
// make sure each feature has exact same fields
allData.forEach(d => {
fields.forEach(f => {
if (!(f in d)) {
d[f] = null;
}
});
});
return processRowObject(allData);
}
/**
* On export data to csv
* @param data
* @param fields
*/
export function formatCsv(data, fields) {
const columns = fields.map(f => f.name);
const formattedData = [columns];
// parse geojson object as string
data.forEach(row => {
formattedData.push(
row.map(
(d, i) => d && GEOJSON_FIELDS.geojson.includes(fields[i].name) ?
JSON.stringify(d) : d
)
)
});
return csvFormatRows(formattedData);
}
/**
* @param data
* @returns {{allData: Array, fields: Array}}
*/
export function validateInputData(data) {
// TODO: add test
/*
* expected input data format
* {
* fields: [],
* rows: []
* }
*/
let proceed = true;
if (!data) {
assert('receiveVisData: data cannot be null');
proceed = false;
} else if (!Array.isArray(data.fields)) {
assert('receiveVisData: expect data.fields to be an array');
proceed = false;
} else if (!Array.isArray(data.rows)) {
assert('receiveVisData: expect data.rows to be an array');
proceed = false;
}
if (!proceed) {
return null;
}
const {fields, rows} = data;
// check if all fields has name, format and type
const allValid = fields.every((f, i) => {
if (typeof f !== 'object') {
assert(`fields needs to be an array of object, but find ${f}`);
return false;
}
if (!f.name) {
assert(
`field.name is required but missing in field ${JSON.stringify(f)}`
);
// assign a name
f.name = `column_${i}`;
}
if (!ALL_FIELD_TYPES[f.type]) {
assert(`unknown field type ${f.type}`);
return false;
}
return f.type && f.format && f.name;
});
if (allValid) {
return {rows, fields};
}
// if any field has missing type, recalculate it for everyone
// because we simply lost faith in humanity
const sampleData = getSampleForTypeAnalyze({fields: fields.map(f => f.name), allData: rows});
const fieldOrder = fields.map(f => f.name);
const meta = getFieldsFromData(sampleData, fieldOrder);
const updatedFields = fields.map((f, i) => ({
...f,
type: meta[i].type,
format: meta[i].format
}));
return {fields: updatedFields, rows};
}
export default {
processGeojson,
processCsvData,
processRowObject,
analyzerTypeToFieldType,
getFieldsFromData,
parseCsvDataByFieldType
};