kepler.gl
Version:
kepler.gl is a webgl based application to visualize large scale location data in the browser
548 lines (527 loc) • 70.1 kB
JavaScript
"use strict";
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
var _typeof = require("@babel/runtime/helpers/typeof");
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.Processors = exports.PARSE_FIELD_VALUE_FROM_STRING = exports.DATASET_HANDLERS = exports.CSV_NULLS = void 0;
exports.arrowSchemaToFields = arrowSchemaToFields;
exports.getGeoArrowMetadataFromSchema = getGeoArrowMetadataFromSchema;
exports.parseCsvRowsByFieldType = parseCsvRowsByFieldType;
exports.parseRowsByFields = parseRowsByFields;
exports.processArrowBatches = processArrowBatches;
exports.processArrowTable = processArrowTable;
exports.processCsvData = processCsvData;
exports.processGeojson = processGeojson;
exports.processKeplerglDataset = processKeplerglDataset;
exports.processKeplerglJSON = processKeplerglJSON;
exports.processRowObject = processRowObject;
var _toConsumableArray2 = _interopRequireDefault(require("@babel/runtime/helpers/toConsumableArray"));
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
var arrow = _interopRequireWildcard(require("apache-arrow"));
var _d3Dsv = require("d3-dsv");
var _typeAnalyzer = require("type-analyzer");
var _geojsonNormalize = _interopRequireDefault(require("@mapbox/geojson-normalize"));
var _core = require("@loaders.gl/core");
var _wkt = require("@loaders.gl/wkt");
var _constants = require("@kepler.gl/constants");
var _utils = require("@kepler.gl/utils");
var _commonUtils = require("@kepler.gl/common-utils");
var _schemas = require("@kepler.gl/schemas");
function _getRequireWildcardCache(e) { if ("function" != typeof WeakMap) return null; var r = new WeakMap(), t = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(e) { return e ? t : r; })(e); }
function _interopRequireWildcard(e, r) { if (!r && e && e.__esModule) return e; if (null === e || "object" != _typeof(e) && "function" != typeof e) return { "default": e }; var t = _getRequireWildcardCache(r); if (t && t.has(e)) return t.get(e); var n = { __proto__: null }, a = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var u in e) if ("default" !== u && {}.hasOwnProperty.call(e, u)) { var i = a ? Object.getOwnPropertyDescriptor(e, u) : null; i && (i.get || i.set) ? Object.defineProperty(n, u, i) : n[u] = e[u]; } return n["default"] = e, t && t.set(e, n), n; }
function ownKeys(e, r) { var t = Object.keys(e); if (Object.getOwnPropertySymbols) { var o = Object.getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return Object.getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
function _objectSpread(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys(Object(t), !0).forEach(function (r) { (0, _defineProperty2["default"])(e, r, t[r]); }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(e, Object.getOwnPropertyDescriptors(t)) : ownKeys(Object(t)).forEach(function (r) { Object.defineProperty(e, r, Object.getOwnPropertyDescriptor(t, r)); }); } return e; } // SPDX-License-Identifier: MIT
// Copyright contributors to the kepler.gl project
// if any of these value occurs in csv, parse it to null;
// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
// matches empty string
var CSV_NULLS = exports.CSV_NULLS = /^(null|NULL|Null|NaN|\/N||)$/;
function tryParseJsonString(str) {
try {
return JSON.parse(str);
} catch (e) {
return null;
}
}
var PARSE_FIELD_VALUE_FROM_STRING = exports.PARSE_FIELD_VALUE_FROM_STRING = (0, _defineProperty2["default"])((0, _defineProperty2["default"])((0, _defineProperty2["default"])((0, _defineProperty2["default"])((0, _defineProperty2["default"])((0, _defineProperty2["default"])((0, _defineProperty2["default"])({}, _constants.ALL_FIELD_TYPES["boolean"], {
valid: function valid(d) {
return typeof d === 'boolean';
},
parse: function parse(d) {
return d === 'true' || d === 'True' || d === 'TRUE' || d === '1';
}
}), _constants.ALL_FIELD_TYPES.integer, {
// @ts-ignore
valid: function valid(d) {
return parseInt(d, 10) === d;
},
// @ts-ignore
parse: function parse(d) {
return parseInt(d, 10);
}
}), _constants.ALL_FIELD_TYPES.timestamp, {
valid: function valid(d, field) {
return ['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string';
},
parse: function parse(d, field) {
return ['x', 'X'].includes(field.format) ? Number(d) : d;
}
}), _constants.ALL_FIELD_TYPES.real, {
// @ts-ignore
valid: function valid(d) {
return parseFloat(d) === d;
},
// Note this will result in NaN for some string
parse: parseFloat
}), _constants.ALL_FIELD_TYPES.object, {
valid: _utils.isPlainObject,
parse: tryParseJsonString
}), _constants.ALL_FIELD_TYPES.array, {
valid: Array.isArray,
parse: tryParseJsonString
}), _constants.ALL_FIELD_TYPES.h3, {
valid: function valid(d) {
return (0, _commonUtils.h3IsValid)(d);
},
parse: function parse(d) {
return d;
}
});
/**
* Process csv data, output a data object with `{fields: [], rows: []}`.
* The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
* @param rawData raw csv string
* @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
* @public
* @example
* import {processCsvData} from 'kepler.gl/processors';
*
* const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
* 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
* 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
* 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
* 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
*
* const dataset = {
* info: {id: 'test_data', label: 'My Csv'},
* data: processCsvData(testData)
* };
*
* dispatch(addDataToMap({
* datasets: [dataset],
* options: {centerMap: true, readOnly: true}
* }));
*/
function processCsvData(rawData, header) {
var rows;
var headerRow;
if (typeof rawData === 'string') {
var _parsedRows = (0, _d3Dsv.csvParseRows)(rawData);
if (!Array.isArray(_parsedRows) || _parsedRows.length < 2) {
// looks like an empty file, throw error to be catch
throw new Error('process Csv Data Failed: CSV is empty');
}
headerRow = _parsedRows[0];
rows = _parsedRows.slice(1);
} else if (Array.isArray(rawData) && rawData.length) {
rows = rawData;
headerRow = header;
if (!Array.isArray(headerRow)) {
// if data is passed in as array of rows and missing header
// assume first row is header
// @ts-ignore
headerRow = rawData[0];
rows = rawData.slice(1);
}
}
if (!rows || !headerRow) {
throw new Error('invalid input passed to processCsvData');
}
// here we assume the csv file that people uploaded will have first row
// as name of the column
cleanUpFalsyCsvValue(rows);
// No need to run type detection on every data point
// here we get a list of none null values to run analyze on
var sample = (0, _commonUtils.getSampleForTypeAnalyze)({
fields: headerRow,
rows: rows
});
var fields = (0, _commonUtils.getFieldsFromData)(sample, headerRow);
var parsedRows = parseRowsByFields(rows, fields);
return {
fields: fields,
rows: parsedRows
};
}
/**
* Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
* @param rows
* @param fields
*/
function parseRowsByFields(rows, fields) {
// Edit rows in place
var geojsonFieldIdx = fields.findIndex(function (f) {
return f.name === '_geojson';
});
fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));
return rows;
}
/**
* Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
* so that type-analyzer won't detect it as string
*
* @param rows
*/
function cleanUpFalsyCsvValue(rows) {
var re = new RegExp(CSV_NULLS, 'g');
for (var i = 0; i < rows.length; i++) {
for (var j = 0; j < rows[i].length; j++) {
// analyzer will set any fields to 'string' if there are empty values
// which will be parsed as '' by d3.csv
// here we parse empty data as null
// TODO: create warning when deltect `CSV_NULLS` in the data
if (typeof rows[i][j] === 'string' && rows[i][j].match(re)) {
rows[i][j] = null;
}
}
}
}
/**
* Process uploaded csv file to parse value by field type
*
* @param rows
* @param geoFieldIdx field index
* @param field
* @param i
*/
function parseCsvRowsByFieldType(rows, geoFieldIdx, field, i) {
var parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];
if (parser) {
// check first not null value of it's already parsed
var first = rows.find(function (r) {
return (0, _commonUtils.notNullorUndefined)(r[i]);
});
if (!first || parser.valid(first[i], field)) {
return;
}
rows.forEach(function (row) {
// parse string value based on field type
if (row[i] !== null) {
row[i] = parser.parse(row[i], field);
if (geoFieldIdx > -1 && (0, _utils.isPlainObject)(row[geoFieldIdx]) &&
// @ts-ignore
(0, _utils.hasOwnProperty)(row[geoFieldIdx], 'properties')) {
// @ts-ignore
row[geoFieldIdx].properties[field.name] = row[i];
}
}
});
}
}
/* eslint-enable complexity */
/**
* Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
* NOTE: This function may mutate input.
* @param rawData an array of row object, each object should have the same number of keys
* @returns dataset containing `fields` and `rows`
* @public
* @example
* import {addDataToMap} from 'kepler.gl/actions';
* import {processRowObject} from 'kepler.gl/processors';
*
* const data = [
* {lat: 31.27, lng: 127.56, value: 3},
* {lat: 31.22, lng: 126.26, value: 1}
* ];
*
* dispatch(addDataToMap({
* datasets: {
* info: {label: 'My Data', id: 'my_data'},
* data: processRowObject(data)
* }
* }));
*/
function processRowObject(rawData) {
if (!Array.isArray(rawData)) {
return null;
} else if (!rawData.length) {
// data is empty
return {
fields: [],
rows: []
};
}
var keys = Object.keys(rawData[0]); // [lat, lng, value]
var rows = rawData.map(function (d) {
return keys.map(function (key) {
return d[key];
});
}); // [[31.27, 127.56, 3]]
// row object can still contain values like `Null` or `N/A`
cleanUpFalsyCsvValue(rows);
return processCsvData(rows, keys);
}
/**
* Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
* output a data object with `{fields: [], rows: []}`.
* The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
* NOTE: This function may mutate input.
*
* @param rawData raw geojson feature collection
* @returns dataset containing `fields` and `rows`
* @public
* @example
* import {addDataToMap} from 'kepler.gl/actions';
* import {processGeojson} from 'kepler.gl/processors';
*
* const geojson = {
* "type" : "FeatureCollection",
* "features" : [{
* "type" : "Feature",
* "properties" : {
* "capacity" : "10",
* "type" : "U-Rack"
* },
* "geometry" : {
* "type" : "Point",
* "coordinates" : [ -71.073283, 42.417500 ]
* }
* }]
* };
*
* dispatch(addDataToMap({
* datasets: {
* info: {
* label: 'Sample Taxi Trips in New York City',
* id: 'test_trip_data'
* },
* data: processGeojson(geojson)
* }
* }));
*/
function processGeojson(rawData) {
var normalizedGeojson = (0, _geojsonNormalize["default"])(rawData);
if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) {
throw new Error("Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](".concat(_constants.GUIDES_FILE_FORMAT_DOC, ")"));
}
// getting all feature fields
var allDataRows = [];
for (var i = 0; i < normalizedGeojson.features.length; i++) {
var f = normalizedGeojson.features[i];
if (f.geometry) {
allDataRows.push(_objectSpread({
// add feature to _geojson field
_geojson: f
}, f.properties || {}));
}
}
// get all the field
var fields = allDataRows.reduce(function (accu, curr) {
Object.keys(curr).forEach(function (key) {
if (!accu.includes(key)) {
accu.push(key);
}
});
return accu;
}, []);
// make sure each feature has exact same fields
allDataRows.forEach(function (d) {
fields.forEach(function (f) {
if (!(f in d)) {
d[f] = null;
if (d._geojson.properties) {
d._geojson.properties[f] = null;
}
}
});
});
return processRowObject(allDataRows);
}
/**
* Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
* The json object should contain `datasets` and `config`.
* @param rawData
* @param schema
* @returns datasets and config `{datasets: {}, config: {}}`
* @public
* @example
* import {addDataToMap} from 'kepler.gl/actions';
* import {processKeplerglJSON} from 'kepler.gl/processors';
*
* dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
*/
function processKeplerglJSON(rawData) {
var schema = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : _schemas.KeplerGlSchema;
return rawData ? schema.load(rawData.datasets, rawData.config) : null;
}
/**
* Parse a single or an array of datasets saved using kepler.gl schema
* @param rawData
* @param schema
*/
function processKeplerglDataset(rawData) {
var schema = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : _schemas.KeplerGlSchema;
if (!rawData) {
return null;
}
var results = schema.parseSavedData((0, _commonUtils.toArray)(rawData));
if (!results) {
return null;
}
return Array.isArray(rawData) ? results : results[0];
}
/**
* Parse arrow table and return a dataset
*
* @param arrowTable ArrowTable to parse, see loaders.gl/schema
* @returns dataset containing `fields` and `rows` or null
*/
function processArrowTable(arrowTable) {
// @ts-ignore - Unknown data type causing build failures
return processArrowBatches(arrowTable.data.batches);
}
/**
* Extracts GeoArrow metadata from an Apache Arrow table schema.
* For geoparquet files geoarrow metadata isn't present in fields, so extract extra info from schema.
* @param table The Apache Arrow table to extract metadata from.
* @returns An object mapping column names to their GeoArrow encoding type.
* @throws Logs an error message if parsing of metadata fails.
*/
function getGeoArrowMetadataFromSchema(table) {
var geoArrowMetadata = {};
try {
var _table$schema$metadat;
var geoString = (_table$schema$metadat = table.schema.metadata) === null || _table$schema$metadat === void 0 ? void 0 : _table$schema$metadat.get('geo');
if (geoString) {
var parsedGeoString = JSON.parse(geoString);
if (parsedGeoString.columns) {
Object.keys(parsedGeoString.columns).forEach(function (columnName) {
var columnData = parsedGeoString.columns[columnName];
if ((columnData === null || columnData === void 0 ? void 0 : columnData.encoding) === 'WKB') {
geoArrowMetadata[columnName] = _constants.GEOARROW_EXTENSIONS.WKB;
}
// TODO potentially there are other types but no datasets to test
});
}
}
} catch (error) {
console.error('An error during arrow table schema metadata parsing');
}
return geoArrowMetadata;
}
/**
* Converts an Apache Arrow table schema into an array of Kepler.gl field objects.
* @param table The Apache Arrow table whose schema needs to be converted.
* @param fieldTypeSuggestions Optional mapping of field names to suggested field types.
* @returns An array of field objects suitable for Kepler.gl.
*/
function arrowSchemaToFields(table) {
var fieldTypeSuggestions = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
var headerRow = table.schema.fields.map(function (f) {
return f.name;
});
var sample = (0, _commonUtils.getSampleForTypeAnalyzeArrow)(table, headerRow);
var keplerFields = (0, _commonUtils.getFieldsFromData)(sample, headerRow);
var geoArrowMetadata = getGeoArrowMetadataFromSchema(table);
return table.schema.fields.map(function (field, fieldIndex) {
var _field$metadata$get;
var type = (0, _utils.arrowDataTypeToFieldType)(field.type);
var analyzerType = (0, _utils.arrowDataTypeToAnalyzerDataType)(field.type);
var format = '';
// geometry fields produced by DuckDB's st_asgeojson()
if (fieldTypeSuggestions[field.name] === 'JSON') {
type = _constants.ALL_FIELD_TYPES.geojson;
analyzerType = _typeAnalyzer.DATA_TYPES.GEOMETRY_FROM_STRING;
} else if (fieldTypeSuggestions[field.name] === 'GEOMETRY' || (_field$metadata$get = field.metadata.get(_constants.GEOARROW_METADATA_KEY)) !== null && _field$metadata$get !== void 0 && _field$metadata$get.startsWith('geoarrow')) {
type = _constants.ALL_FIELD_TYPES.geoarrow;
analyzerType = _typeAnalyzer.DATA_TYPES.GEOMETRY;
} else if (geoArrowMetadata[field.name]) {
var _field$metadata;
type = _constants.ALL_FIELD_TYPES.geoarrow;
analyzerType = _typeAnalyzer.DATA_TYPES.GEOMETRY;
(_field$metadata = field.metadata) === null || _field$metadata === void 0 || _field$metadata.set(_constants.GEOARROW_METADATA_KEY, geoArrowMetadata[field.name]);
} else if (fieldTypeSuggestions[field.name] === 'BLOB') {
// When arrow wkb column saved to DuckDB as BLOB without any metadata, then queried back
try {
var _table$getChildAt;
var data = (_table$getChildAt = table.getChildAt(fieldIndex)) === null || _table$getChildAt === void 0 ? void 0 : _table$getChildAt.get(0);
if (data) {
var binaryGeo = (0, _core.parseSync)(data, _wkt.WKBLoader);
if (binaryGeo) {
var _field$metadata2;
type = _constants.ALL_FIELD_TYPES.geoarrow;
analyzerType = _typeAnalyzer.DATA_TYPES.GEOMETRY;
(_field$metadata2 = field.metadata) === null || _field$metadata2 === void 0 || _field$metadata2.set(_constants.GEOARROW_METADATA_KEY, _constants.GEOARROW_EXTENSIONS.WKB);
}
}
} catch (error) {
// ignore, not WKB
}
} else {
// TODO should we use Kepler getFieldsFromData instead
// of arrowDataTypeToFieldType for all fields?
var keplerField = keplerFields[fieldIndex];
if (keplerField.type === _constants.ALL_FIELD_TYPES.timestamp) {
type = keplerField.type;
analyzerType = keplerField.analyzerType;
format = keplerField.format;
}
}
return _objectSpread(_objectSpread({}, field), {}, {
name: field.name,
id: field.name,
displayName: field.name,
format: format,
fieldIdx: fieldIndex,
type: type,
analyzerType: analyzerType,
valueAccessor: function valueAccessor(dc) {
return function (d) {
return dc.valueAt(d.index, fieldIndex);
};
},
metadata: field.metadata
});
});
}
/**
* Parse arrow batches returned from parseInBatches()
*
* @param arrowTable the arrow table to parse
* @returns dataset containing `fields` and `rows` or null
*/
function processArrowBatches(arrowBatches) {
if (arrowBatches.length === 0) {
return null;
}
var arrowTable = new arrow.Table(arrowBatches);
var fields = arrowSchemaToFields(arrowTable);
var cols = (0, _toConsumableArray2["default"])(Array(arrowTable.numCols).keys()).map(function (i) {
return arrowTable.getChildAt(i);
});
// return empty rows and use raw arrow table to construct column-wise data container
return {
fields: fields,
rows: [],
cols: cols,
metadata: arrowTable.schema.metadata,
// Save original arrow schema, for better ingestion into DuckDB.
// TODO consider returning arrowTable in cols, not an array of Vectors from arrowTable.
arrowSchema: arrowTable.schema
};
}
var DATASET_HANDLERS = exports.DATASET_HANDLERS = (0, _defineProperty2["default"])((0, _defineProperty2["default"])((0, _defineProperty2["default"])((0, _defineProperty2["default"])((0, _defineProperty2["default"])({}, _constants.DATASET_FORMATS.row, processRowObject), _constants.DATASET_FORMATS.geojson, processGeojson), _constants.DATASET_FORMATS.csv, processCsvData), _constants.DATASET_FORMATS.arrow, processArrowTable), _constants.DATASET_FORMATS.keplergl, processKeplerglDataset);
var Processors = exports.Processors = {
processGeojson: processGeojson,
processCsvData: processCsvData,
processArrowTable: processArrowTable,
processArrowBatches: processArrowBatches,
processRowObject: processRowObject,
processKeplerglJSON: processKeplerglJSON,
processKeplerglDataset: processKeplerglDataset,
analyzerTypeToFieldType: _commonUtils.analyzerTypeToFieldType,
getFieldsFromData: _commonUtils.getFieldsFromData,
parseCsvRowsByFieldType: parseCsvRowsByFieldType
};
//# sourceMappingURL=data:application/json;charset=utf-8;base64,