@loaders.gl/geopackage
Version:
GeoPackage data loaders
501 lines (438 loc) • 14.7 kB
text/typescript
/* eslint-disable camelcase, @typescript-eslint/no-use-before-define */
import {isBrowser} from '@loaders.gl/loader-utils';
import type {
Schema,
Field,
Geometry,
DataType,
Tables,
GeoJSONTable,
Feature
} from '@loaders.gl/schema';
import {convertWKBToGeometry, transformGeoJsonCoords} from '@loaders.gl/gis';
import {Proj4Projection} from '@math.gl/proj4';
import initSqlJs, {SqlJsStatic, Database, Statement} from 'sql.js';
import type {GeoPackageLoaderOptions} from '../geopackage-loader';
import {
GeometryColumnsRow,
ContentsRow,
SpatialRefSysRow,
ProjectionMapping,
GeometryBitFlags,
DataColumnsRow,
DataColumnsMapping,
PragmaTableInfoRow,
SQLiteTypes,
GeoPackageGeometryTypes
} from './types';
const SQL_JS_VERSION = '1.8.0';
/**
* We pin to the same version as sql.js that we use.
* As of March 2022, versions 1.6.0, 1.6.1, and 1.6.2 of sql.js appeared not to work.
*/
export const DEFAULT_SQLJS_CDN = isBrowser
? `https://cdnjs.cloudflare.com/ajax/libs/sql.js/${SQL_JS_VERSION}/`
: null;
// https://www.geopackage.org/spec121/#flags_layout
const ENVELOPE_BYTE_LENGTHS = {
0: 0,
1: 32,
2: 48,
3: 48,
4: 64,
// values 5-7 are invalid and _should_ never show up
5: 0,
6: 0,
7: 0
};
// Documentation: https://www.geopackage.org/spec130/index.html#table_column_data_types
const SQL_TYPE_MAPPING: Record<SQLiteTypes | GeoPackageGeometryTypes, DataType> = {
BOOLEAN: 'bool',
TINYINT: 'int8',
SMALLINT: 'int16',
MEDIUMINT: 'int32',
INT: 'int32',
INTEGER: 'int32',
FLOAT: 'float32',
DOUBLE: 'float64',
REAL: 'float64',
TEXT: 'utf8',
BLOB: 'binary',
DATE: 'utf8',
DATETIME: 'utf8',
GEOMETRY: 'binary',
POINT: 'binary',
LINESTRING: 'binary',
POLYGON: 'binary',
MULTIPOINT: 'binary',
MULTILINESTRING: 'binary',
MULTIPOLYGON: 'binary',
GEOMETRYCOLLECTION: 'binary'
};
export async function parseGeoPackage(
arrayBuffer: ArrayBuffer,
options?: GeoPackageLoaderOptions
): Promise<GeoJSONTable | Tables<GeoJSONTable>> {
const {sqlJsCDN = DEFAULT_SQLJS_CDN} = options?.geopackage || {};
const {reproject = false, _targetCrs = 'WGS84'} = options?.gis || {};
const db = await loadDatabase(arrayBuffer, sqlJsCDN);
const tables = listVectorTables(db);
const projections = getProjections(db);
const selectedTable = tables.find((table) => table.table_name === options?.geopackage?.table);
const tableName = selectedTable ? selectedTable.table_name : tables[0].table_name;
const shape = options?.geopackage?.shape;
switch (shape) {
case 'geojson-table':
return getVectorTable(db, tableName, projections, {
reproject,
_targetCrs
});
case 'tables':
// Mapping from tableName to geojson feature collection
const outputTables: Tables<GeoJSONTable> = {
shape: 'tables',
tables: []
};
for (const table of tables) {
const {table_name: tableName} = table;
outputTables.tables.push({
name: tableName,
table: getVectorTable(db, tableName, projections, {
reproject,
_targetCrs
})
});
}
return outputTables;
default:
throw new Error(shape);
}
}
/**
* Initialize SQL.js and create database
*
* @param arrayBuffer input bytes
* @return SQL.js database object
*/
async function loadDatabase(arrayBuffer: ArrayBuffer, sqlJsCDN: string | null): Promise<Database> {
// In Node, `locateFile` must not be passed
let SQL: SqlJsStatic;
if (sqlJsCDN) {
SQL = await initSqlJs({
locateFile: (file) => `${sqlJsCDN}${file}`
});
} else {
SQL = await initSqlJs();
}
return new SQL.Database(new Uint8Array(arrayBuffer));
}
/**
* Find all vector tables in GeoPackage
* This queries the `gpkg_contents` table to find a list of vector tables
*
* @param db GeoPackage to query
* @return list of table references
*/
function listVectorTables(db: Database): ContentsRow[] {
// The gpkg_contents table can have at least three categorical values for
// data_type.
// - 'features' refers to a vector geometry table
// (https://www.geopackage.org/spec121/#_contents_2)
// - 'tiles' refers to a raster table
// (https://www.geopackage.org/spec121/#_contents_3)
// - 'attributes' refers to a data table with no geometry
// (https://www.geopackage.org/spec121/#_contents_4).
// We hard code 'features' because for now we don't support raster data or pure attribute data
// eslint-disable-next-line quotes
const stmt = db.prepare("SELECT * FROM gpkg_contents WHERE data_type='features';");
const vectorTablesInfo: ContentsRow[] = [];
while (stmt.step()) {
const vectorTableInfo = stmt.getAsObject() as unknown as ContentsRow;
vectorTablesInfo.push(vectorTableInfo);
}
return vectorTablesInfo;
}
/**
* Load geometries from vector table
*
* @param db GeoPackage object
* @param tableName name of vector table to query
* @param projections keys are srs_id values, values are WKT strings
* @returns Array of GeoJSON Feature objects
*/
function getVectorTable(
db: Database,
tableName: string,
projections: ProjectionMapping,
{reproject, _targetCrs}: {reproject: boolean; _targetCrs: string}
): GeoJSONTable {
const dataColumns = getDataColumns(db, tableName);
const geomColumn = getGeometryColumn(db, tableName);
const featureIdColumn = getFeatureIdName(db, tableName);
// Get vector features from table
// Don't think it's possible to parameterize the table name in SQLite?
const {columns, values} = db.exec(`SELECT * FROM \`${tableName}\`;`)[0];
let projection;
if (reproject) {
const geomColumnProjStr = projections[geomColumn.srs_id];
projection = new Proj4Projection({
from: geomColumnProjStr,
to: _targetCrs
});
}
const geojsonFeatures: Feature<Geometry | null>[] = [];
for (const row of values) {
const geojsonFeature = constructGeoJsonFeature(
columns,
row,
geomColumn,
// @ts-ignore
dataColumns,
featureIdColumn
);
geojsonFeatures.push(geojsonFeature);
}
const schema = getSchema(db, tableName);
if (projection) {
return {
shape: 'geojson-table',
type: 'FeatureCollection',
// @ts-expect-error TODO - null geometries causing problems...
features: transformGeoJsonCoords(geojsonFeatures, projection.project),
schema
};
}
return {
shape: 'geojson-table',
schema,
type: 'FeatureCollection',
// @ts-expect-error TODO - null features
features: geojsonFeatures
};
}
/**
* Find all projections defined in GeoPackage
* This queries the gpkg_spatial_ref_sys table
* @param db GeoPackage object
* @returns mapping from srid to WKT projection string
*/
function getProjections(db: Database): ProjectionMapping {
// Query gpkg_spatial_ref_sys to get srid: srtext mappings
const stmt = db.prepare('SELECT * FROM gpkg_spatial_ref_sys;');
const projectionMapping: ProjectionMapping = {};
while (stmt.step()) {
const srsInfo = stmt.getAsObject() as unknown as SpatialRefSysRow;
const {srs_id, definition} = srsInfo;
projectionMapping[srs_id] = definition;
}
return projectionMapping;
}
/**
* Construct single GeoJSON feature given row's data
* @param columns array of ordered column identifiers
* @param row array of ordered values representing row's data
* @param geomColumn geometry column metadata
* @param dataColumns mapping from table column names to property name
* @returns GeoJSON Feature object
*/
function constructGeoJsonFeature(
columns: string[],
row: any[],
geomColumn: GeometryColumnsRow,
dataColumns: DataColumnsMapping,
featureIdColumn: string
): Feature<Geometry | null> {
// Find feature id
const idIdx = columns.indexOf(featureIdColumn);
const id = row[idIdx];
// Parse geometry columns to geojson
const geomColumnIdx = columns.indexOf(geomColumn.column_name);
const geometry = parseGeometry(row[geomColumnIdx].buffer);
const properties = {};
if (dataColumns) {
for (const [key, value] of Object.entries(dataColumns)) {
const idx = columns.indexOf(key);
// @ts-ignore TODO - Check what happens if null?
properties[value] = row[idx];
}
} else {
// Put all columns except for the feature id and geometry in properties
for (let i = 0; i < columns.length; i++) {
if (i === idIdx || i === geomColumnIdx) {
// eslint-disable-next-line no-continue
continue;
}
const columnName = columns[i];
properties[columnName] = row[i];
}
}
return {
id,
type: 'Feature',
geometry,
properties
};
}
/**
* Get GeoPackage version from database
* @param db database
* @returns version string. One of '1.0', '1.1', '1.2'
*/
// @ts-ignore
// eslint-disable-next-line @typescript-eslint/no-unused-vars
function getGeopackageVersion(db: Database): string | null {
const textDecoder = new TextDecoder();
// Read application id from SQLite metadata
const applicationIdQuery = db.exec('PRAGMA application_id;')[0];
const applicationId = applicationIdQuery.values[0][0];
// Convert 4-byte signed int32 application id to text
const buffer = new ArrayBuffer(4);
const view = new DataView(buffer);
view.setInt32(0, Number(applicationId));
const versionString = textDecoder.decode(buffer);
if (versionString === 'GP10') {
return '1.0';
}
if (versionString === 'GP11') {
return '1.1';
}
// If versionString is GPKG, then read user_version
const userVersionQuery = db.exec('PRAGMA user_version;')[0];
const userVersionInt = userVersionQuery.values[0][0];
if (userVersionInt && typeof userVersionInt === 'number' && userVersionInt < 10300) {
return '1.2';
}
return null;
}
/**
* Find name of feature id column in table
* The feature ID is the primary key of the table.
* http://www.geopackage.org/spec121/#feature_user_tables
*
* @param db database
* @param tableName name of table
* @return name of feature id column
*/
function getFeatureIdName(db: Database, tableName: string): string | null {
// Again, not possible to parameterize table name?
const stmt = db.prepare(`PRAGMA table_info(\`${tableName}\`)`);
while (stmt.step()) {
const pragmaTableInfo = stmt.getAsObject() as unknown as PragmaTableInfoRow;
const {name, pk} = pragmaTableInfo;
if (pk) {
return name;
}
}
// Is it guaranteed for there always to be at least one primary key column in the table?
return null;
}
/**
* Parse geometry buffer
* GeoPackage vector geometries are slightly extended past the WKB standard
* See: https://www.geopackage.org/spec121/#gpb_format
*
* @param arrayBuffer geometry buffer
* @return GeoJSON geometry (in original CRS)
*/
function parseGeometry(arrayBuffer: ArrayBuffer): Geometry | null {
const view = new DataView(arrayBuffer);
const {envelopeLength, emptyGeometry} = parseGeometryBitFlags(view.getUint8(3));
// A Feature object has a member with the name "geometry". The value of the
// geometry member SHALL be either a Geometry object as defined above or, in
// the case that the Feature is unlocated, a JSON null value.
/** @see https://tools.ietf.org/html/rfc7946#section-3.2 */
if (emptyGeometry) {
return null;
}
// Do I need to find the srid here? Is it necessarily the same for every
// geometry in a table?
// const srid = view.getInt32(4, littleEndian);
// 2 byte magic, 1 byte version, 1 byte flags, 4 byte int32 srid
const wkbOffset = 8 + envelopeLength;
const wkbBytes = arrayBuffer.slice(wkbOffset);
return convertWKBToGeometry(wkbBytes);
}
/**
* Parse geometry header flags
* https://www.geopackage.org/spec121/#flags_layout
*
* @param byte uint8 number representing flags
* @return object representing information from bit flags
*/
function parseGeometryBitFlags(byte: number): GeometryBitFlags {
// Are header values little endian?
const envelopeValue = (byte & 0b00001110) / 2;
// TODO: Not sure the best way to handle this. Throw an error if envelopeValue outside 0-7?
const envelopeLength = ENVELOPE_BYTE_LENGTHS[envelopeValue] as number;
return {
littleEndian: Boolean(byte & 0b00000001),
envelopeLength,
emptyGeometry: Boolean(byte & 0b00010000),
extendedGeometryType: Boolean(byte & 0b00100000)
};
}
/**
* Find geometry column in given vector table
*
* @param db GeoPackage object
* @param tableName Name of vector table
* @returns Array of geometry column definitions
*/
function getGeometryColumn(db: Database, tableName: string): GeometryColumnsRow {
const stmt = db.prepare('SELECT * FROM gpkg_geometry_columns WHERE table_name=:tableName;');
stmt.bind({':tableName': tableName});
// > Requirement 30
// > A feature table SHALL have only one geometry column.
// https://www.geopackage.org/spec121/#feature_user_tables
// So we should need one and only one step, given that we use the WHERE clause in the SQL query
// above
stmt.step();
const geometryColumn = stmt.getAsObject() as unknown as GeometryColumnsRow;
return geometryColumn;
}
/**
* Find property columns in given vector table
* @param db GeoPackage object
* @param tableName Name of vector table
* @returns Mapping from table column names to property name
*/
function getDataColumns(db: Database, tableName: string): DataColumnsMapping | null {
// gpkg_data_columns is not required to exist
// https://www.geopackage.org/spec121/#extension_schema
let stmt: Statement;
try {
stmt = db.prepare('SELECT * FROM gpkg_data_columns WHERE table_name=:tableName;');
} catch (error) {
if ((error as Error).message.includes('no such table')) {
return null;
}
throw error;
}
stmt.bind({':tableName': tableName});
// Convert DataColumnsRow object this to a key-value {column_name: name}
const result: DataColumnsMapping = {};
while (stmt.step()) {
const column = stmt.getAsObject() as unknown as DataColumnsRow;
const {column_name, name} = column;
result[column_name] = name || null;
}
return result;
}
/**
* Get arrow schema
* @param db GeoPackage object
* @param tableName table name
* @returns Arrow-like Schema
*/
function getSchema(db: Database, tableName: string): Schema {
const stmt = db.prepare(`PRAGMA table_info(\`${tableName}\`)`);
const fields: Field[] = [];
while (stmt.step()) {
const pragmaTableInfo = stmt.getAsObject() as unknown as PragmaTableInfoRow;
const {name, type: sqlType, notnull} = pragmaTableInfo;
const type = SQL_TYPE_MAPPING[sqlType];
const field = {name, type, nullable: !notnull};
fields.push(field);
}
return {fields, metadata: {}};
}