UNPKG

@loaders.gl/schema

Version:

Table format APIs for JSON, CSV, etc...

351 lines (350 loc) 12.9 kB
// loaders.gl // SPDX-License-Identifier: MIT // Copyright (c) vis.gl contributors export function isTable(table) { const shape = typeof table === 'object' && table?.shape; switch (shape) { case 'array-row-table': case 'object-row-table': return Array.isArray(table.data); case 'geojson-table': return Array.isArray(table.features); case 'columnar-table': return table.data && typeof table.data === 'object'; case 'arrow-table': return Boolean(table?.data?.numRows !== undefined); default: return false; } } /** * Returns the length of the table (i.e. the number of rows) */ export function getTableLength(table) { switch (table.shape) { case 'array-row-table': case 'object-row-table': return table.data.length; case 'geojson-table': return table.features.length; case 'arrow-table': const arrowTable = table.data; return arrowTable.numRows; case 'columnar-table': for (const column of Object.values(table.data)) { return column.length || 0; } return 0; default: throw new Error('table'); } } /** * Returns the number of columns in the table * @throws Fails to deduce number of columns if the table has no schema and is empty */ export function getTableNumCols(table) { if (table.schema) { return table.schema.fields.length; } if (getTableLength(table) === 0) { throw new Error('empty table'); } switch (table.shape) { case 'array-row-table': return table.data[0].length; case 'object-row-table': return Object.keys(table.data[0]).length; case 'geojson-table': return Object.keys(table.features[0]).length; case 'columnar-table': return Object.keys(table.data).length; case 'arrow-table': const arrowTable = table.data; return arrowTable.numCols; default: throw new Error('table'); } } /** Get a table cell value at row index and column name */ export function getTableCell(table, rowIndex, columnName) { switch (table.shape) { case 'array-row-table': const columnIndex = getTableColumnIndex(table, columnName); return table.data[rowIndex][columnIndex]; case 'object-row-table': return table.data[rowIndex][columnName]; case 'geojson-table': return table.features[rowIndex][columnName]; case 'columnar-table': const column = table.data[columnName]; return column[rowIndex]; case 'arrow-table': const arrowTable = table.data; const arrowColumnIndex = arrowTable.schema.fields.findIndex((field) => field.name === columnName); return arrowTable.getChildAt(arrowColumnIndex)?.get(rowIndex); default: throw new Error('todo'); } } /** Get a table cell value at row index and column name */ export function getTableCellAt(table, rowIndex, columnIndex) { switch (table.shape) { case 'array-row-table': return table.data[rowIndex][columnIndex]; case 'object-row-table': const columnName1 = getTableColumnName(table, columnIndex); return table.data[rowIndex][columnName1]; case 'geojson-table': const columnName2 = getTableColumnName(table, columnIndex); return table.features[rowIndex][columnName2]; case 'columnar-table': const columnName3 = getTableColumnName(table, columnIndex); const column = table.data[columnName3]; return column[rowIndex]; case 'arrow-table': const arrowTable = table.data; return arrowTable.getChildAt(columnIndex)?.get(rowIndex); default: throw new Error('todo'); } } /** Deduce the table row shape */ export function getTableRowShape(table) { switch (table.shape) { case 'array-row-table': case 'object-row-table': return table.shape; case 'geojson-table': // TODO - this is not correct, geojson-table is not a row table return 'object-row-table'; case 'columnar-table': default: throw new Error('Not a row table'); } } /** Get the index of a named table column. Requires the table to have a schema */ export function getTableColumnIndex(table, columnName) { const columnIndex = table.schema?.fields.findIndex((field) => field.name === columnName); if (columnIndex === undefined) { throw new Error(columnName); } return columnIndex; } /** Get the name of a table column by index. Requires the table to have a schema */ export function getTableColumnName(table, columnIndex) { const columnName = table.schema?.fields[columnIndex]?.name; if (!columnName) { throw new Error(`${columnIndex}`); } return columnName; } /** * Returns one row of the table in object format. * @param target Optional parameter will be used if needed to store the row. Can be reused between calls to improve performance * @returns an array representing the row. May be the original array in the row, a new object, or the target parameter */ // eslint-disable-next-line complexity export function getTableRowAsObject(table, rowIndex, target, copy) { switch (table.shape) { case 'object-row-table': return copy ? Object.fromEntries(Object.entries(table.data[rowIndex])) : table.data[rowIndex]; case 'array-row-table': if (table.schema) { const objectRow = target || {}; for (let i = 0; i < table.schema.fields.length; i++) { objectRow[table.schema.fields[i].name] = table.data[rowIndex][i]; } return objectRow; } throw new Error('no schema'); case 'geojson-table': if (table.schema) { const objectRow = target || {}; // TODO - should lift properties to top level for (let i = 0; i < table.schema.fields.length; i++) { objectRow[table.schema.fields[i].name] = table.features[rowIndex][i]; } return objectRow; } throw new Error('no schema'); case 'columnar-table': if (table.schema) { const objectRow = target || {}; for (let i = 0; i < table.schema.fields.length; i++) { objectRow[table.schema.fields[i].name] = table.data[table.schema.fields[i].name][rowIndex]; } return objectRow; } else { // eslint-disable-line no-else-return const objectRow = target || {}; for (const [name, column] of Object.entries(table.data)) { objectRow[name] = column[rowIndex]; } return objectRow; } case 'arrow-table': const arrowTable = table.data; const objectRow = target || {}; const row = arrowTable.get(rowIndex); const schema = arrowTable.schema; for (let i = 0; i < schema.fields.length; i++) { objectRow[schema.fields[i].name] = row?.[schema.fields[i].name]; } return objectRow; default: throw new Error('shape'); } } /** * Returns one row of the table in array format. * @param target Optional parameter will be used if needed to store the row. Can be reused between calls to improve performance. * @returns an array representing the row. May be the original array in the row, a new object, or the target parameter */ // eslint-disable-next-line complexity export function getTableRowAsArray(table, rowIndex, target, copy) { switch (table.shape) { case 'array-row-table': return copy ? Array.from(table.data[rowIndex]) : table.data[rowIndex]; case 'object-row-table': if (table.schema) { const arrayRow = target || []; for (let i = 0; i < table.schema.fields.length; i++) { arrayRow[i] = table.data[rowIndex][table.schema.fields[i].name]; } return arrayRow; } // Warning: just slap on the values, this risks mismatches between rows return Object.values(table.data[rowIndex]); case 'geojson-table': if (table.schema) { const arrayRow = target || []; // TODO - should lift properties to top level for (let i = 0; i < table.schema.fields.length; i++) { arrayRow[i] = table.features[rowIndex][table.schema.fields[i].name]; } return arrayRow; } // Warning: just slap on the values, this risks mismatches between rows return Object.values(table.features[rowIndex]); case 'columnar-table': if (table.schema) { const arrayRow = target || []; for (let i = 0; i < table.schema.fields.length; i++) { arrayRow[i] = table.data[table.schema.fields[i].name][rowIndex]; } return arrayRow; } else { // eslint-disable-line no-else-return const arrayRow = target || []; let i = 0; for (const column of Object.values(table.data)) { arrayRow[i] = column[rowIndex]; i++; } return arrayRow; } case 'arrow-table': const arrowTable = table.data; const arrayRow = target || []; const row = arrowTable.get(rowIndex); const schema = arrowTable.schema; for (let i = 0; i < schema.fields.length; i++) { arrayRow[i] = row?.[schema.fields[i].name]; } return arrayRow; default: throw new Error('shape'); } } /** Convert any table into array row format */ export function makeArrayRowTable(table) { if (table.shape === 'array-row-table') { return table; } const length = getTableLength(table); const data = new Array(length); for (let rowIndex = 0; rowIndex < length; rowIndex++) { data[rowIndex] = getTableRowAsArray(table, rowIndex); } return { shape: 'array-row-table', schema: table.schema, data }; } /** Convert any table into object row format */ export function makeObjectRowTable(table) { if (table.shape === 'object-row-table') { return table; } const length = getTableLength(table); const data = new Array(length); for (let rowIndex = 0; rowIndex < length; rowIndex++) { data[rowIndex] = getTableRowAsObject(table, rowIndex); } return { shape: 'object-row-table', schema: table.schema, data }; } /** Convert any table into object row format */ export function makeColumnarTable(table) { if (table.shape === 'object-row-table') { return table; } const length = getTableLength(table); const data = new Array(length); for (let rowIndex = 0; rowIndex < length; rowIndex++) { data[rowIndex] = getTableRowAsObject(table, rowIndex); } return { shape: 'object-row-table', schema: table.schema, data }; } // Row Iterators /** * Iterate over table rows * @param table * @param shape */ export function* makeRowIterator(table, shape) { switch (shape) { case 'array-row-table': yield* makeArrayRowIterator(table); break; case 'object-row-table': yield* makeObjectRowIterator(table); break; default: throw new Error(`Unknown row type ${shape}`); } } /** * Streaming processing: Iterate over table, yielding array rows * @param table * @param shape */ export function* makeArrayRowIterator(table, target = []) { const length = getTableLength(table); for (let rowIndex = 0; rowIndex < length; rowIndex++) { yield getTableRowAsArray(table, rowIndex, target); } } /** * Streaming processing: Iterate over table, yielding object rows * @param table * @param shape */ export function* makeObjectRowIterator(table, target = {}) { const length = getTableLength(table); for (let rowIndex = 0; rowIndex < length; rowIndex++) { yield getTableRowAsObject(table, rowIndex, target); } }