kepler.gl
Version:
kepler.gl is a webgl based application to visualize large scale location data in the browser
669 lines (637 loc) • 74.5 kB
JavaScript
"use strict";
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
var _typeof = require("@babel/runtime/helpers/typeof");
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.SUPPORTED_DUCKDB_DROP_EXTENSIONS = void 0;
exports.castDuckDBTypesForKepler = castDuckDBTypesForKepler;
exports.checkIsSelectQuery = checkIsSelectQuery;
exports.dropTableIfExists = void 0;
exports.getDuckDBColumnTypes = getDuckDBColumnTypes;
exports.getDuckDBColumnTypesMap = getDuckDBColumnTypesMap;
exports.isGeoArrowLineString = isGeoArrowLineString;
exports.isGeoArrowMultiLineString = isGeoArrowMultiLineString;
exports.isGeoArrowMultiPoint = isGeoArrowMultiPoint;
exports.isGeoArrowMultiPolygon = isGeoArrowMultiPolygon;
exports.isGeoArrowPoint = isGeoArrowPoint;
exports.isGeoArrowPolygon = isGeoArrowPolygon;
exports.quoteTableName = quoteTableName;
exports.removeSQLComments = removeSQLComments;
exports.restoreUnsupportedExtensions = exports.restoreArrowTable = exports.removeUnsupportedExtensions = void 0;
exports.sanitizeDuckDBTableName = sanitizeDuckDBTableName;
exports.setGeoArrowWKBExtension = setGeoArrowWKBExtension;
exports.splitSqlStatements = splitSqlStatements;
exports.tableFromFile = tableFromFile;
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
var arrow = _interopRequireWildcard(require("apache-arrow"));
var _type = require("apache-arrow/type");
var _duckdbWasm = require("@duckdb/duckdb-wasm");
var _constants = require("@kepler.gl/constants");
var _utils = require("@kepler.gl/utils");
function _getRequireWildcardCache(e) { if ("function" != typeof WeakMap) return null; var r = new WeakMap(), t = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(e) { return e ? t : r; })(e); }
function _interopRequireWildcard(e, r) { if (!r && e && e.__esModule) return e; if (null === e || "object" != _typeof(e) && "function" != typeof e) return { "default": e }; var t = _getRequireWildcardCache(r); if (t && t.has(e)) return t.get(e); var n = { __proto__: null }, a = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var u in e) if ("default" !== u && {}.hasOwnProperty.call(e, u)) { var i = a ? Object.getOwnPropertyDescriptor(e, u) : null; i && (i.get || i.set) ? Object.defineProperty(n, u, i) : n[u] = e[u]; } return n["default"] = e, t && t.set(e, n), n; }
// SPDX-License-Identifier: MIT
// Copyright contributors to the kepler.gl project
// loaders.gl
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors
// Copied from loaders.gl/geoarrow
// TODO: Remove isGeoArrow* once Kepler.gl is upgraded to loaders.gl 4.4+
var SUPPORTED_DUCKDB_DROP_EXTENSIONS = exports.SUPPORTED_DUCKDB_DROP_EXTENSIONS = ['arrow', 'csv', 'geojson', 'json', 'parquet'];
/**
* Queries a DuckDB table for the schema description.
* @param connection An active DuckDB connection.
* @param tableName A name of DuckDB table to query.
* @returns An array of column names and DuckDB types.
*/
function getDuckDBColumnTypes(_x, _x2) {
return _getDuckDBColumnTypes.apply(this, arguments);
}
/**
* Generates a mapping of column names to their corresponding DuckDB data types.
* @param columns An array of column descriptions from DuckDB. Check getDuckDBColumnTypes.
* @returns A record where keys are column names and values are their data types.
*/
function _getDuckDBColumnTypes() {
_getDuckDBColumnTypes = (0, _asyncToGenerator2["default"])( /*#__PURE__*/_regenerator["default"].mark(function _callee2(connection, tableName) {
var quotedTableName, duckDbTypes, resInfo, numRows, columnNames, columnTypes, i, resDescribe, _numRows, _i, _resDescribe$getChild, _resDescribe$getChild2, columnName, columnType, error;
return _regenerator["default"].wrap(function _callee2$(_context2) {
while (1) switch (_context2.prev = _context2.next) {
case 0:
quotedTableName = quoteTableName(tableName);
duckDbTypes = [];
_context2.prev = 2;
_context2.next = 5;
return connection.query("PRAGMA table_info(".concat(quotedTableName, ")"));
case 5:
resInfo = _context2.sent;
numRows = resInfo.numRows;
columnNames = resInfo.getChild('name');
columnTypes = resInfo.getChild('type');
for (i = 0; i < numRows; ++i) {
duckDbTypes.push({
name: columnNames === null || columnNames === void 0 ? void 0 : columnNames.get(i),
type: columnTypes === null || columnTypes === void 0 ? void 0 : columnTypes.get(i)
});
}
_context2.next = 27;
break;
case 12:
_context2.prev = 12;
_context2.t0 = _context2["catch"](2);
_context2.prev = 14;
_context2.next = 17;
return connection.query("DESCRIBE ".concat(quotedTableName));
case 17:
resDescribe = _context2.sent;
_numRows = resDescribe.numRows;
for (_i = 0; _i < _numRows; ++_i) {
columnName = (_resDescribe$getChild = resDescribe.getChildAt(0)) === null || _resDescribe$getChild === void 0 ? void 0 : _resDescribe$getChild.get(_i);
columnType = (_resDescribe$getChild2 = resDescribe.getChildAt(1)) === null || _resDescribe$getChild2 === void 0 ? void 0 : _resDescribe$getChild2.get(_i);
duckDbTypes.push({
name: columnName,
type: columnType
});
}
_context2.next = 27;
break;
case 22:
_context2.prev = 22;
_context2.t1 = _context2["catch"](14);
error = new Error("[DuckDB] Failed to load column types for ".concat(tableName, " (PRAGMA + DESCRIBE)."));
error.cause = {
primaryError: _context2.t0,
fallbackError: _context2.t1
};
throw error;
case 27:
return _context2.abrupt("return", duckDbTypes);
case 28:
case "end":
return _context2.stop();
}
}, _callee2, null, [[2, 12], [14, 22]]);
}));
return _getDuckDBColumnTypes.apply(this, arguments);
}
function getDuckDBColumnTypesMap(columns) {
return columns.reduce(function (acc, value) {
acc[value.name] = value.type;
return acc;
}, {});
}
/**
* Quotes a table name for safe SQL usage.
* Always quotes to handle all edge cases (spaces, special characters, reserved words).
* For fully qualified names (containing dots), preserves the existing structure.
* @param tableName The table name to quote.
* @returns The table name, properly quoted.
*/
function quoteTableName(tableName) {
// Return as-is if:
// 1. It's already a properly quoted simple identifier (starts and ends with quotes)
// 2. It contains both dots and quotes (assume it's a qualified name)
if (tableName.startsWith('"') && tableName.endsWith('"') || tableName.includes('.') && tableName.includes('"')) {
return tableName;
}
return "\"".concat(tableName.replace(/"/g, '""'), "\"");
}
/**
* Quotes a column name for safe SQL usage.
* Always quotes to handle all edge cases (spaces, special characters, reserved words).
* @param columnName The column name to quote.
* @returns The column name, properly quoted.
*/
function quoteColumnName(columnName) {
return "\"".concat(columnName.replace(/"/g, '""'), "\"");
}
/**
* Constructs an SQL query to select all columns from a given table,
* converting specified columns to Well-Known Binary (WKB) format using ST_AsWKB,
* and casting BIGINT columns to DOUBLE if specified.
* @param tableName The name of the table from which to select data.
* @param columns An array of column descriptors, each with a type and name.
* @param options Optional parameters to control the conversion behavior.
* @returns The constructed SQL query.
*/
function castDuckDBTypesForKepler(tableName, columns) {
var options = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {
geometryToWKB: true,
bigIntToDouble: true
};
var modifiedColumns = columns.map(function (column) {
var name = column.name,
type = column.type;
var quotedColumnName = quoteColumnName(name);
if (type === 'GEOMETRY' && options.geometryToWKB) {
return "ST_AsWKB(".concat(quotedColumnName, ") as ").concat(quotedColumnName);
} else if (options.bigIntToDouble && (type === 'BIGINT' || type === 'UBIGINT' || type === 'HUGEINT' || type === 'UHUGEINT' || type.startsWith('DECIMAL'))) {
// Cast 64-bit and larger integer types and DECIMAL to DOUBLE to avoid BigInt in JS
return "CAST(".concat(quotedColumnName, " AS DOUBLE) as ").concat(quotedColumnName);
}
return quotedColumnName;
});
var quotedTableName = quoteTableName(tableName);
return "SELECT ".concat(modifiedColumns.join(', '), " FROM ").concat(quotedTableName);
}
/**
* Sets the GeoArrow WKB extension metadata for columns of type GEOMETRY in an Arrow table.
* @param table The Apache Arrow table whose schema fields will be modified.
* @param columns An array of column descriptors from a DuckDB table.
*/
function setGeoArrowWKBExtension(table, columns) {
table.schema.fields.forEach(function (field) {
var info = columns.find(function (t) {
return t.name === field.name;
});
if ((info === null || info === void 0 ? void 0 : info.type) === 'GEOMETRY') {
field.metadata.set(_constants.GEOARROW_METADATA_KEY, _constants.GEOARROW_EXTENSIONS.WKB);
}
});
}
/**
* Creates an arrow table from an array of arrow vectors and fields.
* @param columns An array of arrow vectors.
* @param fields An array of fields per arrow vector.
* @param arrowSchema Optional arrow table schema when available.
* @returns An arrow table.
*/
var restoreArrowTable = exports.restoreArrowTable = function restoreArrowTable(columns, fields, arrowSchema) {
var creaOpts = {};
fields.map(function (field, index) {
creaOpts[field.name] = columns[index];
});
return arrowSchema ? new arrow.Table(arrowSchema, creaOpts) : new arrow.Table(creaOpts);
};
/**
* DuckDb throws when geoarrow extensions are present in metadata.
* @param table An arrow table to clear from extensions.
* @returns A map of removed per field geoarrow extensions.
*/
var removeUnsupportedExtensions = exports.removeUnsupportedExtensions = function removeUnsupportedExtensions(table) {
var removedMetadata = {};
table.schema.fields.forEach(function (field) {
var extension = field.metadata.get(_constants.GEOARROW_METADATA_KEY);
if (extension !== null && extension !== void 0 && extension.startsWith('geoarrow')) {
removedMetadata[field.name] = extension;
field.metadata["delete"](_constants.GEOARROW_METADATA_KEY);
}
});
return removedMetadata;
};
/**
* Restore removed metadata extensions after a call to removeUnsupportedExtensions.
* @param table An arrow table to restore geoarrow extensions.
* @param removedExtensions A map of per field geoarrow extensions to restore.
*/
var restoreUnsupportedExtensions = exports.restoreUnsupportedExtensions = function restoreUnsupportedExtensions(table, removedExtensions) {
table.schema.fields.forEach(function (field) {
var extension = removedExtensions[field.name];
if (extension) {
field.metadata.set(_constants.GEOARROW_METADATA_KEY, extension);
}
});
};
/** Checks whether the given Apache Arrow JS type is a Point data type */
function isGeoArrowPoint(type) {
if (_type.DataType.isFixedSizeList(type)) {
// Check list size
if (![2, 3, 4].includes(type.listSize)) {
return false;
}
// Check child of FixedSizeList is floating type
if (!_type.DataType.isFloat(type.children[0])) {
return false;
}
return true;
}
return false;
}
/** Checks whether the given Apache Arrow JS type is a Point data type */
function isGeoArrowLineString(type) {
// Check the outer type is a List
if (!_type.DataType.isList(type)) {
return false;
}
// Check the child is a point type
if (!isGeoArrowPoint(type.children[0].type)) {
return false;
}
return true;
}
/** Checks whether the given Apache Arrow JS type is a Polygon data type */
function isGeoArrowPolygon(type) {
// Check the outer vector is a List
if (!_type.DataType.isList(type)) {
return false;
}
// Check the child is a linestring vector
if (!isGeoArrowLineString(type.children[0].type)) {
return false;
}
return true;
}
/** Checks whether the given Apache Arrow JS type is a Polygon data type */
function isGeoArrowMultiPoint(type) {
// Check the outer vector is a List
if (!_type.DataType.isList(type)) {
return false;
}
// Check the child is a point vector
if (!isGeoArrowPoint(type.children[0].type)) {
return false;
}
return true;
}
/** Checks whether the given Apache Arrow JS type is a Polygon data type */
function isGeoArrowMultiLineString(type) {
// Check the outer vector is a List
if (!_type.DataType.isList(type)) {
return false;
}
// Check the child is a linestring vector
if (!isGeoArrowLineString(type.children[0].type)) {
return false;
}
return true;
}
/** Checks whether the given Apache Arrow JS type is a Polygon data type */
function isGeoArrowMultiPolygon(type) {
// Check the outer vector is a List
if (!_type.DataType.isList(type)) {
return false;
}
// Check the child is a polygon vector
if (!isGeoArrowPolygon(type.children[0].type)) {
return false;
}
return true;
}
/**
* Checks if the given SQL query is a SELECT query by using the EXPLAIN command.
* @param connection The DuckDB connection instance.
* @param query The SQL query to check.
* @returns Resolves to `true` if the query is a SELECT statement, otherwise `false`.
*/
function checkIsSelectQuery(_x3, _x4) {
return _checkIsSelectQuery.apply(this, arguments);
}
/**
* Split a string with potentially multiple SQL queries (separated as usual by ';') into an array of queries.
* This implementation:
* - Handles single and double quoted strings with proper escaping
* - Ignores semicolons in line comments (--) and block comments (slash asterisk)
* - Trims whitespace from queries
* - Handles SQL-style escaped quotes ('' inside strings)
* - Returns only non-empty queries
* @param input A string with potentially multiple SQL queries.
* @returns An array of queries.
*/
function _checkIsSelectQuery() {
_checkIsSelectQuery = (0, _asyncToGenerator2["default"])( /*#__PURE__*/_regenerator["default"].mark(function _callee3(connection, query) {
var result;
return _regenerator["default"].wrap(function _callee3$(_context3) {
while (1) switch (_context3.prev = _context3.next) {
case 0:
_context3.prev = 0;
_context3.next = 3;
return connection.query("EXPLAIN (".concat(query, ")"));
case 3:
result = _context3.sent;
return _context3.abrupt("return", result.numRows > 0);
case 7:
_context3.prev = 7;
_context3.t0 = _context3["catch"](0);
return _context3.abrupt("return", false);
case 10:
case "end":
return _context3.stop();
}
}, _callee3, null, [[0, 7]]);
}));
return _checkIsSelectQuery.apply(this, arguments);
}
function splitSqlStatements(input) {
var queries = [];
var currentQuery = '';
var inSingleQuote = false;
var inDoubleQuote = false;
var inLineComment = false;
var inBlockComment = false;
for (var i = 0; i < input.length; i++) {
var _char = input[i];
if (inLineComment) {
currentQuery += _char;
if (_char === '\n') {
inLineComment = false;
}
continue;
}
if (inBlockComment) {
currentQuery += _char;
if (_char === '*' && input[i + 1] === '/') {
inBlockComment = false;
currentQuery += input[++i]; // Consume '/'
}
continue;
}
if (inSingleQuote) {
currentQuery += _char;
if (_char === "'") {
// Handle escaped single quotes in SQL
if (i + 1 < input.length && input[i + 1] === "'") {
currentQuery += input[++i];
} else {
inSingleQuote = false;
}
}
continue;
}
if (inDoubleQuote) {
currentQuery += _char;
if (_char === '"') {
// Handle escaped double quotes
if (i + 1 < input.length && input[i + 1] === '"') {
currentQuery += input[++i];
} else {
inDoubleQuote = false;
}
}
continue;
}
// Check for comment starts
if (_char === '-' && input[i + 1] === '-') {
inLineComment = true;
currentQuery += _char + input[++i];
continue;
}
if (_char === '/' && input[i + 1] === '*') {
inBlockComment = true;
currentQuery += _char + input[++i];
continue;
}
// Check for quote starts
if (_char === "'") {
inSingleQuote = true;
currentQuery += _char;
continue;
}
if (_char === '"') {
inDoubleQuote = true;
currentQuery += _char;
continue;
}
// Handle query separator
if (_char === ';') {
var _trimmed = currentQuery.trim();
if (_trimmed.length > 0) {
queries.push(_trimmed);
}
currentQuery = '';
continue;
}
currentQuery += _char;
}
// Add the final query
var trimmed = currentQuery.trim();
if (trimmed.length > 0) {
queries.push(trimmed);
}
return queries;
}
/**
* Removes SQL comments from a given SQL string.
* @param sql The SQL query string from which comments should be removed.
* @returns The cleaned SQL string without comments.
*/
function removeSQLComments(sql) {
// Remove multi-line comments (/* ... */)
sql = sql.replace(/\/\*[\s\S]*?\*\//g, '');
// Remove single-line comments (-- ...)
sql = sql.replace(/--.*$/gm, '');
return sql.trim();
}
/**
* Drops a table if it exists in the DuckDB database.
* @param connection The DuckDB connection instance.
* @param tableName The name of the table to drop.
* @returns A promise that resolves when the operation is complete.
* @throws Logs an error if the table drop operation fails.
*/
var dropTableIfExists = exports.dropTableIfExists = /*#__PURE__*/function () {
var _ref = (0, _asyncToGenerator2["default"])( /*#__PURE__*/_regenerator["default"].mark(function _callee(connection, tableName) {
return _regenerator["default"].wrap(function _callee$(_context) {
while (1) switch (_context.prev = _context.next) {
case 0:
_context.prev = 0;
_context.next = 3;
return connection.query("DROP TABLE IF EXISTS \"".concat(tableName, "\";"));
case 3:
_context.next = 8;
break;
case 5:
_context.prev = 5;
_context.t0 = _context["catch"](0);
console.error('Dropping table failed', tableName, _context.t0);
case 8:
case "end":
return _context.stop();
}
}, _callee, null, [[0, 5]]);
}));
return function dropTableIfExists(_x5, _x6) {
return _ref.apply(this, arguments);
};
}();
/**
* Imports a file into DuckDB as a table, supporting multiple formats from SUPPORTED_DUCKDB_DROP_EXTENSIONS.
* @param file The file to be imported.
* @returns A promise that resolves when the file has been processed into a DuckDB table.
*/
function tableFromFile(_x7) {
return _tableFromFile.apply(this, arguments);
}
/**
* Sanitizes a file name to be a valid DuckDB table name.
* @param fileName The input file name to be sanitized.
* @returns A valid DuckDB table name.
*/
function _tableFromFile() {
_tableFromFile = (0, _asyncToGenerator2["default"])( /*#__PURE__*/_regenerator["default"].mark(function _callee4(file) {
var fileExt, db, c, error, tableName, sourceName, arrayBuffer, uint8Array, arrowTable, message;
return _regenerator["default"].wrap(function _callee4$(_context4) {
while (1) switch (_context4.prev = _context4.next) {
case 0:
if (file) {
_context4.next = 2;
break;
}
return _context4.abrupt("return", new Error('File Drag & Drop: No file'));
case 2:
fileExt = SUPPORTED_DUCKDB_DROP_EXTENSIONS.find(function (ext) {
return file.name.endsWith(ext);
});
if (fileExt) {
_context4.next = 5;
break;
}
return _context4.abrupt("return", new Error("File Drag & Drop: File extension isn't supported"));
case 5:
_context4.next = 7;
return (0, _utils.getApplicationConfig)().database;
case 7:
db = _context4.sent;
if (db) {
_context4.next = 10;
break;
}
return _context4.abrupt("return", new Error('The database is not configured properly.'));
case 10:
_context4.next = 12;
return db.connect();
case 12:
c = _context4.sent;
error = null;
_context4.prev = 14;
tableName = sanitizeDuckDBTableName(file.name);
sourceName = 'temp_file_handle';
c.query("install spatial;\n load spatial;");
if (!(fileExt === 'arrow')) {
_context4.next = 28;
break;
}
_context4.next = 21;
return file.arrayBuffer();
case 21:
arrayBuffer = _context4.sent;
uint8Array = new Uint8Array(arrayBuffer);
arrowTable = arrow.tableFromIPC(uint8Array);
_context4.next = 26;
return c.insertArrowTable(arrowTable, {
name: tableName
});
case 26:
_context4.next = 48;
break;
case 28:
_context4.next = 30;
return db.registerFileHandle(sourceName, file, _duckdbWasm.DuckDBDataProtocol.BROWSER_FILEREADER, true);
case 30:
if (!(fileExt === 'csv')) {
_context4.next = 35;
break;
}
_context4.next = 33;
return c.query("\n CREATE TABLE '".concat(tableName, "' AS\n SELECT *\n FROM read_csv('").concat(sourceName, "', header = true, auto_detect = true, sample_size = -1);\n "));
case 33:
_context4.next = 48;
break;
case 35:
if (!(fileExt === 'json')) {
_context4.next = 40;
break;
}
_context4.next = 38;
return c.query("\n CREATE TABLE '".concat(tableName, "' AS\n SELECT *\n FROM read_json_auto('").concat(sourceName, "');\n "));
case 38:
_context4.next = 48;
break;
case 40:
if (!(fileExt === 'geojson')) {
_context4.next = 45;
break;
}
_context4.next = 43;
return c.query("\n CREATE TABLE '".concat(tableName, "' AS\n SELECT *\n FROM ST_READ('").concat(sourceName, "', keep_wkb = TRUE);\n "));
case 43:
_context4.next = 48;
break;
case 45:
if (!(fileExt === 'parquet')) {
_context4.next = 48;
break;
}
_context4.next = 48;
return c.query("\n CREATE TABLE '".concat(tableName, "' AS\n SELECT *\n FROM read_parquet('").concat(sourceName, "')\n "));
case 48:
_context4.next = 54;
break;
case 50:
_context4.prev = 50;
_context4.t0 = _context4["catch"](14);
if (_context4.t0 instanceof Error) {
message = _context4.t0.message || ''; // output more readable errors for known issues
if (message.includes('Arrow Type with extension name: geoarrow')) {
error = new Error('The GeoArrow extensions are not implemented in the connected DuckDB version.');
} else if (message.includes("Geoparquet column 'geometry' does not have geometry types")) {
error = new Error("Invalid Input Error: Geoparquet column 'geometry' does not have geometry types.\nPossible reasons:\n - Old .parquet files that don't match the Parquet format specification.\n - Unsupported compression.");
}
}
if (!error) {
error = _context4.t0;
}
case 54:
_context4.next = 56;
return c.close();
case 56:
return _context4.abrupt("return", error);
case 57:
case "end":
return _context4.stop();
}
}, _callee4, null, [[14, 50]]);
}));
return _tableFromFile.apply(this, arguments);
}
function sanitizeDuckDBTableName(fileName) {
// Replace invalid characters with underscores
var name = fileName.replace(/[^a-zA-Z0-9_]/g, '_');
// Ensure it doesn't start with a digit
if (/^\d/.test(name)) {
name = "t_".concat(name);
}
return name || 'default_table';
}
//# sourceMappingURL=data:application/json;charset=utf-8;base64,{"version":3,"names":["arrow","_interopRequireWildcard","require","_type","_duckdbWasm","_constants","_utils","_getRequireWildcardCache","e","WeakMap","r","t","__esModule","_typeof","has","get","n","__proto__","a","Object","defineProperty","getOwnPropertyDescriptor","u","hasOwnProperty","call","i","set","SUPPORTED_DUCKDB_DROP_EXTENSIONS","exports","getDuckDBColumnTypes","_x","_x2","_getDuckDBColumnTypes","apply","arguments","_asyncToGenerator2","_regenerator","mark","_callee2","connection","tableName","quotedTableName","duckDbTypes","resInfo","numRows","columnNames","columnTypes","resDescribe","_numRows","_i","_resDescribe$getChild","_resDescribe$getChild2","columnName","columnType","error","wrap","_callee2$","_context2","prev","next","quoteTableName","query","concat","sent","getChild","push","name","type","t0","getChildAt","t1","Error","cause","primaryError","fallbackError","abrupt","stop","getDuckDBColumnTypesMap","columns","reduce","acc","value","startsWith","endsWith","includes","replace","quoteColumnName","castDuckDBTypesForKepler","options","length","undefined","geometryToWKB","bigIntToDouble","modifiedColumns","map","column","quotedColumnName","join","setGeoArrowWKBExtension","table","schema","fields","forEach","field","info","find","metadata","GEOARROW_METADATA_KEY","GEOARROW_EXTENSIONS","WKB","restoreArrowTable","arrowSchema","creaOpts","index","Table","removeUnsupportedExtensions","removedMetadata","extension","restoreUnsupportedExtensions","removedExtensions","isGeoArrowPoint","DataType","isFixedSizeList","listSize","isFloat","children","isGeoArrowLineString","isList","isGeoArrowPolygon","isGeoArrowMultiPoint","isGeoArrowMultiLineString","isGeoArrowMultiPolygon","checkIsSelectQuery","_x3","_x4","_checkIsSelectQuery","_callee3","result","_callee3$","_context3","splitSqlStatements","input","queries","currentQuery","inSingleQuote","inDoubleQuote","inLineComment","inBlockComment","char","trimmed","trim","removeSQLComments","sql","dropTableIfExists","_ref","_callee","_callee$","_context","console","_x5","_x6","tableFromFile","_x7","_tableFromFile","_callee4","file","fileExt","db","c","sourceName","arrayBuffer","uint8Array","arrowTable","message","_callee4$","_context4","ext","getApplicationConfig","database","connect","sanitizeDuckDBTableName","Uint8Array","tableFromIPC","insertArrowTable","registerFileHandle","DuckDBDataProtocol","BROWSER_FILEREADER","close","fileName","test"],"sources":["../../src/table/duckdb-table-utils.ts"],"sourcesContent":["// SPDX-License-Identifier: MIT\n// Copyright contributors to the kepler.gl project\n\n// loaders.gl\n// SPDX-License-Identifier: MIT\n// Copyright (c) vis.gl contributors\n\n// Copied from loaders.gl/geoarrow\n\n// TODO: Remove isGeoArrow* once Kepler.gl is upgraded to loaders.gl 4.4+\n\nimport * as arrow from 'apache-arrow';\nimport {DataType} from 'apache-arrow/type';\nimport {DuckDBDataProtocol} from '@duckdb/duckdb-wasm';\n\nimport {GEOARROW_EXTENSIONS, GEOARROW_METADATA_KEY} from '@kepler.gl/constants';\nimport {ProtoDatasetField} from '@kepler.gl/types';\nimport {DatabaseConnection, getApplicationConfig} from '@kepler.gl/utils';\n\nexport const SUPPORTED_DUCKDB_DROP_EXTENSIONS = ['arrow', 'csv', 'geojson', 'json', 'parquet'];\n\nexport type DuckDBColumnDesc = {name: string; type: string};\n\n/**\n * Queries a DuckDB table for the schema description.\n * @param connection An active DuckDB connection.\n * @param tableName A name of DuckDB table to query.\n * @returns An array of column names and DuckDB types.\n */\nexport async function getDuckDBColumnTypes(\n  connection: DatabaseConnection,\n  tableName: string\n): Promise<DuckDBColumnDesc[]> {\n  const quotedTableName = quoteTableName(tableName);\n  const duckDbTypes: DuckDBColumnDesc[] = [];\n  try {\n    // PRAGMA table_info is less likely to bind/execute view SQL than DESCRIBE,\n    // so it avoids triggering remote access (e.g., S3) for view-backed schemas.\n    const resInfo = await connection.query(`PRAGMA table_info(${quotedTableName})`);\n    const numRows = resInfo.numRows;\n    const columnNames = resInfo.getChild('name');\n    const columnTypes = resInfo.getChild('type');\n    for (let i = 0; i < numRows; ++i) {\n      duckDbTypes.push({\n        name: columnNames?.get(i),\n        type: columnTypes?.get(i)\n      });\n    }\n  } catch (primaryError) {\n    try {\n      const resDescribe = await connection.query(`DESCRIBE ${quotedTableName}`);\n      const numRows = resDescribe.numRows;\n      for (let i = 0; i < numRows; ++i) {\n        const columnName = resDescribe.getChildAt(0)?.get(i);\n        const columnType = resDescribe.getChildAt(1)?.get(i);\n\n        duckDbTypes.push({\n          name: columnName,\n          type: columnType\n        });\n      }\n    } catch (fallbackError) {\n      const error = new Error(\n        `[DuckDB] Failed to load column types for ${tableName} (PRAGMA + DESCRIBE).`\n      );\n      (error as Error & {cause?: unknown}).cause = {\n        primaryError,\n        fallbackError\n      };\n      throw error;\n    }\n  }\n\n  return duckDbTypes;\n}\n\n/**\n * Generates a mapping of column names to their corresponding DuckDB data types.\n * @param columns An array of column descriptions from DuckDB. Check getDuckDBColumnTypes.\n * @returns A record where keys are column names and values are their data types.\n */\nexport function getDuckDBColumnTypesMap(columns: DuckDBColumnDesc[]) {\n  return columns.reduce((acc, value) => {\n    acc[value.name] = value.type;\n    return acc;\n  }, {} as Record<string, string>);\n}\n\n/**\n * Quotes a table name for safe SQL usage.\n * Always quotes to handle all edge cases (spaces, special characters, reserved words).\n * For fully qualified names (containing dots), preserves the existing structure.\n * @param tableName The table name to quote.\n * @returns The table name, properly quoted.\n */\nexport function quoteTableName(tableName: string): string {\n  // Return as-is if:\n  // 1. It's already a properly quoted simple identifier (starts and ends with quotes)\n  // 2. It contains both dots and quotes (assume it's a qualified name)\n  if (\n    (tableName.startsWith('\"') && tableName.endsWith('\"')) ||\n    (tableName.includes('.') && tableName.includes('\"'))\n  ) {\n    return tableName;\n  }\n\n  return `\"${tableName.replace(/\"/g, '\"\"')}\"`;\n}\n\n/**\n * Quotes a column name for safe SQL usage.\n * Always quotes to handle all edge cases (spaces, special characters, reserved words).\n * @param columnName The column name to quote.\n * @returns The column name, properly quoted.\n */\nfunction quoteColumnName(columnName: string): string {\n  return `\"${columnName.replace(/\"/g, '\"\"')}\"`;\n}\n\n/**\n * Constructs an SQL query to select all columns from a given table,\n * converting specified columns to Well-Known Binary (WKB) format using ST_AsWKB,\n * and casting BIGINT columns to DOUBLE if specified.\n * @param tableName The name of the table from which to select data.\n * @param columns An array of column descriptors, each with a type and name.\n * @param options Optional parameters to control the conversion behavior.\n * @returns The constructed SQL query.\n */\nexport function castDuckDBTypesForKepler(\n  tableName: string,\n  columns: DuckDBColumnDesc[],\n  options = {geometryToWKB: true, bigIntToDouble: true}\n): string {\n  const modifiedColumns = columns.map(column => {\n    const {name, type} = column;\n    const quotedColumnName = quoteColumnName(name);\n    if (type === 'GEOMETRY' && options.geometryToWKB) {\n      return `ST_AsWKB(${quotedColumnName}) as ${quotedColumnName}`;\n    } else if (\n      options.bigIntToDouble &&\n      (type === 'BIGINT' ||\n        type === 'UBIGINT' ||\n        type === 'HUGEINT' ||\n        type === 'UHUGEINT' ||\n        type.startsWith('DECIMAL'))\n    ) {\n      // Cast 64-bit and larger integer types and DECIMAL to DOUBLE to avoid BigInt in JS\n      return `CAST(${quotedColumnName} AS DOUBLE) as ${quotedColumnName}`;\n    }\n    return quotedColumnName;\n  });\n\n  const quotedTableName = quoteTableName(tableName);\n  return `SELECT ${modifiedColumns.join(', ')} FROM ${quotedTableName}`;\n}\n\n/**\n * Sets the GeoArrow WKB extension metadata for columns of type GEOMETRY in an Arrow table.\n * @param table The Apache Arrow table whose schema fields will be modified.\n * @param columns An array of column descriptors from a DuckDB table.\n */\nexport function setGeoArrowWKBExtension(table: arrow.Table, columns: DuckDBColumnDesc[]) {\n  table.schema.fields.forEach(field => {\n    const info = columns.find(t => t.name === field.name);\n    if (info?.type === 'GEOMETRY') {\n      field.metadata.set(GEOARROW_METADATA_KEY, GEOARROW_EXTENSIONS.WKB);\n    }\n  });\n}\n\n/**\n * Creates an arrow table from an array of arrow vectors and fields.\n * @param columns An array of arrow vectors.\n * @param fields An array of fields per arrow vector.\n * @param arrowSchema Optional arrow table schema when available.\n * @returns An arrow table.\n */\nexport const restoreArrowTable = (\n  columns: arrow.Vector[],\n  fields: ProtoDatasetField[],\n  arrowSchema?: arrow.Schema\n) => {\n  const creaOpts = {};\n  fields.map((field, index) => {\n    creaOpts[field.name] = columns[index];\n  });\n\n  return arrowSchema ? new arrow.Table(arrowSchema, creaOpts) : new arrow.Table(creaOpts);\n};\n\n/**\n * DuckDb throws when geoarrow extensions are present in metadata.\n * @param table An arrow table to clear from extensions.\n * @returns A map of removed per field geoarrow extensions.\n */\nexport const removeUnsupportedExtensions = (table: arrow.Table): Record<string, string> => {\n  const removedMetadata: Record<string, string> = {};\n  table.schema.fields.forEach(field => {\n    const extension = field.metadata.get(GEOARROW_METADATA_KEY);\n    if (extension?.startsWith('geoarrow')) {\n      removedMetadata[field.name] = extension;\n      field.metadata.delete(GEOARROW_METADATA_KEY);\n    }\n  });\n  return removedMetadata;\n};\n\n/**\n * Restore removed metadata extensions after a call to removeUnsupportedExtensions.\n * @param table An arrow table to restore geoarrow extensions.\n * @param removedExtensions A map of per field geoarrow extensions to restore.\n */\nexport const restoreUnsupportedExtensions = (\n  table: arrow.Table,\n  removedExtensions: Record<string, string>\n) => {\n  table.schema.fields.forEach(field => {\n    const extension = removedExtensions[field.name];\n    if (extension) {\n      field.metadata.set(GEOARROW_METADATA_KEY, extension);\n    }\n  });\n};\n\n/** Checks whether the given Apache Arrow JS type is a Point data type */\nexport function isGeoArrowPoint(type: DataType) {\n  if (DataType.isFixedSizeList(type)) {\n    // Check list size\n    if (![2, 3, 4].includes(type.listSize)) {\n      return false;\n    }\n\n    // Check child of FixedSizeList is floating type\n    if (!DataType.isFloat(type.children[0])) {\n      return false;\n    }\n\n    return true;\n  }\n\n  return false;\n}\n\n/** Checks whether the given Apache Arrow JS type is a Point data type */\nexport function isGeoArrowLineString(type: DataType) {\n  // Check the outer type is a List\n  if (!DataType.isList(type)) {\n    return false;\n  }\n\n  // Check the child is a point type\n  if (!isGeoArrowPoint(type.children[0].type)) {\n    return false;\n  }\n\n  return true;\n}\n\n/** Checks whether the given Apache Arrow JS type is a Polygon data type */\nexport function isGeoArrowPolygon(type: DataType) {\n  // Check the outer vector is a List\n  if (!DataType.isList(type)) {\n    return false;\n  }\n\n  // Check the child is a linestring vector\n  if (!isGeoArrowLineString(type.children[0].type)) {\n    return false;\n  }\n\n  return true;\n}\n\n/** Checks whether the given Apache Arrow JS type is a Polygon data type */\nexport function isGeoArrowMultiPoint(type: DataType) {\n  // Check the outer vector is a List\n  if (!DataType.isList(type)) {\n    return false;\n  }\n\n  // Check the child is a point vector\n  if (!isGeoArrowPoint(type.children[0].type)) {\n    return false;\n  }\n\n  return true;\n}\n\n/** Checks whether the given Apache Arrow JS type is a Polygon data type */\nexport function isGeoArrowMultiLineString(type: DataType) {\n  // Check the outer vector is a List\n  if (!DataType.isList(type)) {\n    return false;\n  }\n\n  // Check the child is a linestring vector\n  if (!isGeoArrowLineString(type.children[0].type)) {\n    return false;\n  }\n\n  return true;\n}\n\n/** Checks whether the given Apache Arrow JS type is a Polygon data type */\nexport function isGeoArrowMultiPolygon(type: DataType) {\n  // Check the outer vector is a List\n  if (!DataType.isList(type)) {\n    return false;\n  }\n\n  // Check the child is a polygon vector\n  if (!isGeoArrowPolygon(type.children[0].type)) {\n    return false;\n  }\n\n  return true;\n}\n\n/**\n * Checks if the given SQL query is a SELECT query by using the EXPLAIN command.\n * @param connection The DuckDB connection instance.\n * @param query The SQL query to check.\n * @returns Resolves to `true` if the query is a SELECT statement, otherwise `false`.\n */\nexport async function checkIsSelectQuery(\n  connection: DatabaseConnection,\n  query: string\n): Promise<boolean> {\n  try {\n    const result = await connection.query(`EXPLAIN (${query})`);\n    return result.numRows > 0;\n  } catch (error) {\n    return false;\n  }\n}\n\n/**\n * Split a string with potentially multiple SQL queries (separated as usual by ';') into an array of queries.\n * This implementation:\n *  - Handles single and double quoted strings with proper escaping\n *  - Ignores semicolons in line comments (--) and block comments (slash asterisk)\n *  - Trims whitespace from queries\n *  - Handles SQL-style escaped quotes ('' inside strings)\n *  - Returns only non-empty queries\n * @param input A string with potentially multiple SQL queries.\n * @returns An array of queries.\n */\nexport function splitSqlStatements(input: string): string[] {\n  const queries: string[] = [];\n  let currentQuery = '';\n  let inSingleQuote = false;\n  let inDoubleQuote = false;\n  let inLineComment = false;\n  let inBlockComment = false;\n\n  for (let i = 0; i < input.length; i++) {\n    const char = input[i];\n\n    if (inLineComment) {\n      currentQuery += char;\n      if (char === '\\n') {\n        inLineComment = false;\n      }\n      continue;\n    }\n\n    if (inBlockComment) {\n      currentQuery += char;\n      if (char === '*' && input[i + 1] === '/') {\n        inBlockComment = false;\n        currentQuery += input[++i]; // Consume '/'\n      }\n      continue;\n    }\n\n    if (inSingleQuote) {\n      currentQuery += char;\n      if (char === \"'\") {\n        // Handle escaped single quotes in SQL\n        if (i + 1 < input.length && input[i + 1] === \"'\") {\n          currentQuery += input[++i];\n        } else {\n          inSingleQuote = false;\n        }\n      }\n      continue;\n    }\n\n    if (inDoubleQuote) {\n      currentQuery += char;\n      if (char === '\"') {\n        // Handle escaped double quotes\n        if (i + 1 < input.length && input[i + 1] === '\"') {\n          currentQuery += input[++i];\n        } else {\n          inDoubleQuote = false;\n        }\n      }\n      continue;\n    }\n\n    // Check for comment starts\n    if (char === '-' && input[i + 1] === '-') {\n      inLineComment = true;\n      currentQuery += char + input[++i];\n      continue;\n    }\n\n    if (char === '/' && input[i + 1] === '*') {\n      inBlockComment = true;\n      currentQuery += char + input[++i];\n      continue;\n    }\n\n    // Check for quote starts\n    if (char === \"'\") {\n      inSingleQuote = true;\n      currentQuery += char;\n      continue;\n    }\n\n    if (char === '\"') {\n      inDoubleQuote = true;\n      currentQuery += char;\n      continue;\n    }\n\n    // Handle query separator\n    if (char === ';') {\n      const trimmed = currentQuery.trim();\n      if (trimmed.length > 0) {\n        queries.push(trimmed);\n      }\n      currentQuery = '';\n      continue;\n    }\n\n    currentQuery += char;\n  }\n\n  // Add the final query\n  const trimmed = currentQuery.trim();\n  if (trimmed.length > 0) {\n    queries.push(trimmed);\n  }\n\n  return queries;\n}\n\n/**\n * Removes SQL comments from a given SQL string.\n * @param sql The SQL query string from which comments should be removed.\n * @returns The cleaned SQL string without comments.\n */\nexport function removeSQLComments(sql: string): string {\n  // Remove multi-line comments (/* ... */)\n  sql = sql.replace(/\\/\\*[\\s\\S]*?\\*\\//g, '');\n  // Remove single-line comments (-- ...)\n  sql = sql.replace(/--.*$/gm, '');\n  return sql.trim();\n}\n\n/**\n * Drops a table if it exists in the DuckDB database.\n * @param connection The DuckDB connection instance.\n * @param tableName The name of the table to drop.\n * @returns A promise that resolves when the operation is complete.\n * @throws Logs an error if the table drop operation fails.\n */\nexport const dropTableIfExists = async (connection: DatabaseConnection, tableName: string) => {\n  try {\n    await connection.query(`DROP TABLE IF EXISTS \"${tableName}\";`);\n  } catch (error) {\n    console.error('Dropping table failed', tableName, error);\n  }\n};\n\n/**\n * Imports a file into DuckDB as a table, supporting multiple formats from SUPPORTED_DUCKDB_DROP_EXTENSIONS.\n * @param file The file to be imported.\n * @returns A promise that resolves when the file has been processed into a DuckDB table.\n */\nexport async function tableFromFile(file: File | null): Promise<null | Error> {\n  if (!file) return new Error('File Drag & Drop: No file');\n\n  const fileExt = SUPPORTED_DUCKDB_DROP_EXTENSIONS.find(ext => file.name.endsWith(ext));\n  if (!fileExt) {\n    return new Error(\"File Drag & Drop: File extension isn't supported\");\n  }\n\n  const db = await getApplicationConfig().database;\n  if (!db) {\n    return new Error('The database is not configured properly.');\n  }\n  const c = await db.connect();\n\n  let error: Error | null = null;\n\n  try {\n    const tableName = sanitizeDuckDBTableName(file.name);\n    const sourceName = 'temp_file_handle';\n\n    c.query(`install spatial;\n      load spatial;`);\n\n    if (fileExt === 'arrow') {\n      const arrayBuffer = await file.arrayBuffer();\n      const uint8Array = new Uint8Array(arrayBuffer);\n      const arrowTable = arrow.tableFromIPC(uint8Array);\n\n      await c.insertArrowTable(arrowTable, {name: t