UNPKG

kepler.gl

Version:

kepler.gl is a webgl based application to visualize large scale location data in the browser

601 lines (570 loc) 65.3 kB
"use strict"; var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault"); var _typeof = require("@babel/runtime/helpers/typeof"); Object.defineProperty(exports, "__esModule", { value: true }); exports.SUPPORTED_DUCKDB_DROP_EXTENSIONS = void 0; exports.checkIsSelectQuery = checkIsSelectQuery; exports.constructST_asWKBQuery = constructST_asWKBQuery; exports.dropTableIfExists = void 0; exports.getDuckDBColumnTypes = getDuckDBColumnTypes; exports.getDuckDBColumnTypesMap = getDuckDBColumnTypesMap; exports.getGeometryColumns = getGeometryColumns; exports.isGeoArrowLineString = isGeoArrowLineString; exports.isGeoArrowMultiLineString = isGeoArrowMultiLineString; exports.isGeoArrowMultiPoint = isGeoArrowMultiPoint; exports.isGeoArrowMultiPolygon = isGeoArrowMultiPolygon; exports.isGeoArrowPoint = isGeoArrowPoint; exports.isGeoArrowPolygon = isGeoArrowPolygon; exports.removeSQLComments = removeSQLComments; exports.restoreUnsupportedExtensions = exports.restoreArrowTable = exports.removeUnsupportedExtensions = void 0; exports.sanitizeDuckDBTableName = sanitizeDuckDBTableName; exports.setGeoArrowWKBExtension = setGeoArrowWKBExtension; exports.splitSqlStatements = splitSqlStatements; exports.tableFromFile = tableFromFile; var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator")); var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator")); var arrow = _interopRequireWildcard(require("apache-arrow")); var _type = require("apache-arrow/type"); var _duckdbWasm = require("@duckdb/duckdb-wasm"); var _constants = require("@kepler.gl/constants"); var _init = require("../init"); function _getRequireWildcardCache(e) { if ("function" != typeof WeakMap) return null; var r = new WeakMap(), t = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(e) { return e ? t : r; })(e); } function _interopRequireWildcard(e, r) { if (!r && e && e.__esModule) return e; if (null === e || "object" != _typeof(e) && "function" != typeof e) return { "default": e }; var t = _getRequireWildcardCache(r); if (t && t.has(e)) return t.get(e); var n = { __proto__: null }, a = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var u in e) if ("default" !== u && {}.hasOwnProperty.call(e, u)) { var i = a ? Object.getOwnPropertyDescriptor(e, u) : null; i && (i.get || i.set) ? Object.defineProperty(n, u, i) : n[u] = e[u]; } return n["default"] = e, t && t.set(e, n), n; } // SPDX-License-Identifier: MIT // Copyright contributors to the kepler.gl project // loaders.gl // SPDX-License-Identifier: MIT // Copyright (c) vis.gl contributors // Copied from loaders.gl/geoarrow // TODO: Remove isGeoArrow* once Kepler.gl is upgraded to loaders.gl 4.4+ var SUPPORTED_DUCKDB_DROP_EXTENSIONS = exports.SUPPORTED_DUCKDB_DROP_EXTENSIONS = ['arrow', 'csv', 'geojson', 'json', 'parquet']; /** * Queries a DuckDB table for the schema description. * @param connection An active DuckDB connection. * @param tableName A name of DuckDB table to query. * @returns An array of column names and DuckDB types. */ function getDuckDBColumnTypes(_x, _x2) { return _getDuckDBColumnTypes.apply(this, arguments); } /** * Generates a mapping of column names to their corresponding DuckDB data types. * @param columns An array of column descriptions from DuckDB. Check getDuckDBColumnTypes. * @returns A record where keys are column names and values are their data types. */ function _getDuckDBColumnTypes() { _getDuckDBColumnTypes = (0, _asyncToGenerator2["default"])( /*#__PURE__*/_regenerator["default"].mark(function _callee2(connection, tableName) { var resDescribe, duckDbTypes, numRows, i, _resDescribe$getChild, _resDescribe$getChild2, columnName, columnType; return _regenerator["default"].wrap(function _callee2$(_context2) { while (1) switch (_context2.prev = _context2.next) { case 0: _context2.next = 2; return connection.query("DESCRIBE \"".concat(tableName, "\";")); case 2: resDescribe = _context2.sent; duckDbTypes = []; numRows = resDescribe.numRows; for (i = 0; i < numRows; ++i) { columnName = (_resDescribe$getChild = resDescribe.getChildAt(0)) === null || _resDescribe$getChild === void 0 ? void 0 : _resDescribe$getChild.get(i); columnType = (_resDescribe$getChild2 = resDescribe.getChildAt(1)) === null || _resDescribe$getChild2 === void 0 ? void 0 : _resDescribe$getChild2.get(i); duckDbTypes.push({ name: columnName, type: columnType }); } return _context2.abrupt("return", duckDbTypes); case 7: case "end": return _context2.stop(); } }, _callee2); })); return _getDuckDBColumnTypes.apply(this, arguments); } function getDuckDBColumnTypesMap(columns) { return columns.reduce(function (acc, value) { acc[value.name] = value.type; return acc; }, {}); } /** * Constructs an SQL query to select all columns from a given table, * converting specified columns to Well-Known Binary (WKB) format using ST_AsWKB. * @param tableName The name of the table from which to select data. * @param columnsToConvertToWKB An array of column names that should be converted to WKB format. * @returns The constructed SQL query. */ function constructST_asWKBQuery(tableName, columnsToConvertToWKB) { var exclude = columnsToConvertToWKB.length > 0 ? "EXCLUDE ".concat(columnsToConvertToWKB.join(', ')) : ''; var asWKB = columnsToConvertToWKB.length > 0 ? ", ".concat(columnsToConvertToWKB.map(function (column) { return "ST_AsWKB(".concat(column, ") as ").concat(column); }).join(', ')) : ''; return "SELECT * ".concat(exclude, " ").concat(asWKB, " FROM '").concat(tableName, "';"); } /** * Finds the names of columns that have a GEOMETRY type. * @param columns An array of column descriptors from a DuckDB table. * @returns An array of column names that are of type GEOMETRY. */ function getGeometryColumns(columns) { return columns.filter(function (column) { return column.type === 'GEOMETRY'; }).map(function (column) { return column.name; }); } /** * Sets the GeoArrow WKB extension metadata for columns of type GEOMETRY in an Arrow table. * @param table The Apache Arrow table whose schema fields will be modified. * @param columns An array of column descriptors from a DuckDB table. */ function setGeoArrowWKBExtension(table, columns) { table.schema.fields.forEach(function (field) { var info = columns.find(function (t) { return t.name === field.name; }); if ((info === null || info === void 0 ? void 0 : info.type) === 'GEOMETRY') { field.metadata.set(_constants.GEOARROW_METADATA_KEY, _constants.GEOARROW_EXTENSIONS.WKB); } }); } /** * Creates an arrow table from an array of arrow vectors and fields. * @param columns An array of arrow vectors. * @param fields An array of fields per arrow vector. * @param arrowSchema Optional arrow table schema when available. * @returns An arrow table. */ var restoreArrowTable = exports.restoreArrowTable = function restoreArrowTable(columns, fields, arrowSchema) { var creaOpts = {}; fields.map(function (field, index) { creaOpts[field.name] = columns[index]; }); return arrowSchema ? new arrow.Table(arrowSchema, creaOpts) : new arrow.Table(creaOpts); }; /** * DuckDb throws when geoarrow extensions are present in metadata. * @param table An arrow table to clear from extensions. * @returns A map of removed per field geoarrow extensions. */ var removeUnsupportedExtensions = exports.removeUnsupportedExtensions = function removeUnsupportedExtensions(table) { var removedMetadata = {}; table.schema.fields.forEach(function (field) { var extension = field.metadata.get(_constants.GEOARROW_METADATA_KEY); if (extension !== null && extension !== void 0 && extension.startsWith('geoarrow')) { removedMetadata[field.name] = extension; field.metadata["delete"](_constants.GEOARROW_METADATA_KEY); } }); return removedMetadata; }; /** * Restore removed metadata extensions after a call to removeUnsupportedExtensions. * @param table An arrow table to restore geoarrow extensions. * @param removedExtensions A map of per field geoarrow extensions to restore. */ var restoreUnsupportedExtensions = exports.restoreUnsupportedExtensions = function restoreUnsupportedExtensions(table, removedExtensions) { table.schema.fields.forEach(function (field) { var extension = removedExtensions[field.name]; if (extension) { field.metadata.set(_constants.GEOARROW_METADATA_KEY, extension); } }); }; /** Checks whether the given Apache Arrow JS type is a Point data type */ function isGeoArrowPoint(type) { if (_type.DataType.isFixedSizeList(type)) { // Check list size if (![2, 3, 4].includes(type.listSize)) { return false; } // Check child of FixedSizeList is floating type if (!_type.DataType.isFloat(type.children[0])) { return false; } return true; } return false; } /** Checks whether the given Apache Arrow JS type is a Point data type */ function isGeoArrowLineString(type) { // Check the outer type is a List if (!_type.DataType.isList(type)) { return false; } // Check the child is a point type if (!isGeoArrowPoint(type.children[0].type)) { return false; } return true; } /** Checks whether the given Apache Arrow JS type is a Polygon data type */ function isGeoArrowPolygon(type) { // Check the outer vector is a List if (!_type.DataType.isList(type)) { return false; } // Check the child is a linestring vector if (!isGeoArrowLineString(type.children[0].type)) { return false; } return true; } /** Checks whether the given Apache Arrow JS type is a Polygon data type */ function isGeoArrowMultiPoint(type) { // Check the outer vector is a List if (!_type.DataType.isList(type)) { return false; } // Check the child is a point vector if (!isGeoArrowPoint(type.children[0].type)) { return false; } return true; } /** Checks whether the given Apache Arrow JS type is a Polygon data type */ function isGeoArrowMultiLineString(type) { // Check the outer vector is a List if (!_type.DataType.isList(type)) { return false; } // Check the child is a linestring vector if (!isGeoArrowLineString(type.children[0].type)) { return false; } return true; } /** Checks whether the given Apache Arrow JS type is a Polygon data type */ function isGeoArrowMultiPolygon(type) { // Check the outer vector is a List if (!_type.DataType.isList(type)) { return false; } // Check the child is a polygon vector if (!isGeoArrowPolygon(type.children[0].type)) { return false; } return true; } /** * Checks if the given SQL query is a SELECT query by using the EXPLAIN command. * @param connection The DuckDB connection instance. * @param query The SQL query to check. * @returns Resolves to `true` if the query is a SELECT statement, otherwise `false`. */ function checkIsSelectQuery(_x3, _x4) { return _checkIsSelectQuery.apply(this, arguments); } /** * Split a string with potentially multiple SQL queries (separated as usual by ';') into an array of queries. * This implementation: * - Handles single and double quoted strings with proper escaping * - Ignores semicolons in line comments (--) and block comments (slash asterisk) * - Trims whitespace from queries * - Handles SQL-style escaped quotes ('' inside strings) * - Returns only non-empty queries * @param input A string with potentially multiple SQL queries. * @returns An array of queries. */ function _checkIsSelectQuery() { _checkIsSelectQuery = (0, _asyncToGenerator2["default"])( /*#__PURE__*/_regenerator["default"].mark(function _callee3(connection, query) { var result; return _regenerator["default"].wrap(function _callee3$(_context3) { while (1) switch (_context3.prev = _context3.next) { case 0: _context3.prev = 0; _context3.next = 3; return connection.query("EXPLAIN (".concat(query, ")")); case 3: result = _context3.sent; return _context3.abrupt("return", result.numRows > 0); case 7: _context3.prev = 7; _context3.t0 = _context3["catch"](0); return _context3.abrupt("return", false); case 10: case "end": return _context3.stop(); } }, _callee3, null, [[0, 7]]); })); return _checkIsSelectQuery.apply(this, arguments); } function splitSqlStatements(input) { var queries = []; var currentQuery = ''; var inSingleQuote = false; var inDoubleQuote = false; var inLineComment = false; var inBlockComment = false; for (var i = 0; i < input.length; i++) { var _char = input[i]; if (inLineComment) { currentQuery += _char; if (_char === '\n') { inLineComment = false; } continue; } if (inBlockComment) { currentQuery += _char; if (_char === '*' && input[i + 1] === '/') { inBlockComment = false; currentQuery += input[++i]; // Consume '/' } continue; } if (inSingleQuote) { currentQuery += _char; if (_char === "'") { // Handle escaped single quotes in SQL if (i + 1 < input.length && input[i + 1] === "'") { currentQuery += input[++i]; } else { inSingleQuote = false; } } continue; } if (inDoubleQuote) { currentQuery += _char; if (_char === '"') { // Handle escaped double quotes if (i + 1 < input.length && input[i + 1] === '"') { currentQuery += input[++i]; } else { inDoubleQuote = false; } } continue; } // Check for comment starts if (_char === '-' && input[i + 1] === '-') { inLineComment = true; currentQuery += _char + input[++i]; continue; } if (_char === '/' && input[i + 1] === '*') { inBlockComment = true; currentQuery += _char + input[++i]; continue; } // Check for quote starts if (_char === "'") { inSingleQuote = true; currentQuery += _char; continue; } if (_char === '"') { inDoubleQuote = true; currentQuery += _char; continue; } // Handle query separator if (_char === ';') { var _trimmed = currentQuery.trim(); if (_trimmed.length > 0) { queries.push(_trimmed); } currentQuery = ''; continue; } currentQuery += _char; } // Add the final query var trimmed = currentQuery.trim(); if (trimmed.length > 0) { queries.push(trimmed); } return queries; } /** * Removes SQL comments from a given SQL string. * @param sql The SQL query string from which comments should be removed. * @returns The cleaned SQL string without comments. */ function removeSQLComments(sql) { // Remove multi-line comments (/* ... */) sql = sql.replace(/\/\*[\s\S]*?\*\//g, ''); // Remove single-line comments (-- ...) sql = sql.replace(/--.*$/gm, ''); return sql.trim(); } /** * Drops a table if it exists in the DuckDB database. * @param connection The DuckDB connection instance. * @param tableName The name of the table to drop. * @returns A promise that resolves when the operation is complete. * @throws Logs an error if the table drop operation fails. */ var dropTableIfExists = exports.dropTableIfExists = /*#__PURE__*/function () { var _ref = (0, _asyncToGenerator2["default"])( /*#__PURE__*/_regenerator["default"].mark(function _callee(connection, tableName) { return _regenerator["default"].wrap(function _callee$(_context) { while (1) switch (_context.prev = _context.next) { case 0: _context.prev = 0; _context.next = 3; return connection.query("DROP TABLE IF EXISTS \"".concat(tableName, "\";")); case 3: _context.next = 8; break; case 5: _context.prev = 5; _context.t0 = _context["catch"](0); console.error('Dropping table failed', tableName, _context.t0); case 8: case "end": return _context.stop(); } }, _callee, null, [[0, 5]]); })); return function dropTableIfExists(_x5, _x6) { return _ref.apply(this, arguments); }; }(); /** * Imports a file into DuckDB as a table, supporting multiple formats from SUPPORTED_DUCKDB_DROP_EXTENSIONS. * @param file The file to be imported. * @returns A promise that resolves when the file has been processed into a DuckDB table. */ function tableFromFile(_x7) { return _tableFromFile.apply(this, arguments); } /** * Sanitizes a file name to be a valid DuckDB table name. * @param fileName The input file name to be sanitized. * @returns A valid DuckDB table name. */ function _tableFromFile() { _tableFromFile = (0, _asyncToGenerator2["default"])( /*#__PURE__*/_regenerator["default"].mark(function _callee4(file) { var fileExt, db, c, error, tableName, sourceName, arrayBuffer, uint8Array, arrowTable, message; return _regenerator["default"].wrap(function _callee4$(_context4) { while (1) switch (_context4.prev = _context4.next) { case 0: if (file) { _context4.next = 2; break; } return _context4.abrupt("return", new Error('File Drag & Drop: No file')); case 2: fileExt = SUPPORTED_DUCKDB_DROP_EXTENSIONS.find(function (ext) { return file.name.endsWith(ext); }); if (fileExt) { _context4.next = 5; break; } return _context4.abrupt("return", new Error("File Drag & Drop: File extension isn't supported")); case 5: _context4.next = 7; return (0, _init.getDuckDB)(); case 7: db = _context4.sent; _context4.next = 10; return db.connect(); case 10: c = _context4.sent; error = null; _context4.prev = 12; tableName = sanitizeDuckDBTableName(file.name); sourceName = 'temp_file_handle'; c.query("install spatial;\n load spatial;"); if (!(fileExt === 'arrow')) { _context4.next = 26; break; } _context4.next = 19; return file.arrayBuffer(); case 19: arrayBuffer = _context4.sent; uint8Array = new Uint8Array(arrayBuffer); arrowTable = arrow.tableFromIPC(uint8Array); _context4.next = 24; return c.insertArrowTable(arrowTable, { name: tableName }); case 24: _context4.next = 46; break; case 26: _context4.next = 28; return db.registerFileHandle(sourceName, file, _duckdbWasm.DuckDBDataProtocol.BROWSER_FILEREADER, true); case 28: if (!(fileExt === 'csv')) { _context4.next = 33; break; } _context4.next = 31; return c.query("\n CREATE TABLE '".concat(tableName, "' AS\n SELECT *\n FROM read_csv('").concat(sourceName, "', header = true, auto_detect = true, sample_size = -1);\n ")); case 31: _context4.next = 46; break; case 33: if (!(fileExt === 'json')) { _context4.next = 38; break; } _context4.next = 36; return c.query("\n CREATE TABLE '".concat(tableName, "' AS\n SELECT *\n FROM read_json_auto('").concat(sourceName, "');\n ")); case 36: _context4.next = 46; break; case 38: if (!(fileExt === 'geojson')) { _context4.next = 43; break; } _context4.next = 41; return c.query("\n CREATE TABLE '".concat(tableName, "' AS\n SELECT *\n FROM ST_READ('").concat(sourceName, "', keep_wkb = TRUE);\n ")); case 41: _context4.next = 46; break; case 43: if (!(fileExt === 'parquet')) { _context4.next = 46; break; } _context4.next = 46; return c.query("\n CREATE TABLE '".concat(tableName, "' AS\n SELECT *\n FROM read_parquet('").concat(sourceName, "')\n ")); case 46: _context4.next = 52; break; case 48: _context4.prev = 48; _context4.t0 = _context4["catch"](12); if (_context4.t0 instanceof Error) { message = _context4.t0.message || ''; // output more readable errors for known issues if (message.includes('Arrow Type with extension name: geoarrow')) { error = new Error('The GeoArrow extensions are not implemented in the connected DuckDB version.'); } else if (message.includes("Geoparquet column 'geometry' does not have geometry types")) { error = new Error("Invalid Input Error: Geoparquet column 'geometry' does not have geometry types.\nPossible reasons:\n - Old .parquet files that don't match the Parquet format specification.\n - Unsupported compression."); } } if (!error) { error = _context4.t0; } case 52: _context4.next = 54; return c.close(); case 54: return _context4.abrupt("return", error); case 55: case "end": return _context4.stop(); } }, _callee4, null, [[12, 48]]); })); return _tableFromFile.apply(this, arguments); } function sanitizeDuckDBTableName(fileName) { // Replace invalid characters with underscores var name = fileName.replace(/[^a-zA-Z0-9_]/g, '_'); // Ensure it doesn't start with a digit if (/^\d/.test(name)) { name = "t_".concat(name); } return name || 'default_table'; } //# sourceMappingURL=data:application/json;charset=utf-8;base64,