UNPKG

parquetjs-lite

Version:

fully asynchronous, pure JavaScript implementation of the Parquet file format

405 lines (344 loc) 8.74 kB
'use strict'; const NOT_SUPPORTED = () => { throw new Error('Not Supported in Lite version'); }; const PARQUET_LOGICAL_TYPES = { 'BOOLEAN': { primitiveType: 'BOOLEAN', toPrimitive: toPrimitive_BOOLEAN, fromPrimitive: fromPrimitive_BOOLEAN }, 'INT32': { primitiveType: 'INT32', toPrimitive: toPrimitive_INT32 }, 'INT64': { primitiveType: 'INT64', toPrimitive: toPrimitive_INT64 }, 'INT96': { primitiveType: 'INT96', toPrimitive: toPrimitive_INT96 }, 'FLOAT': { primitiveType: 'FLOAT', toPrimitive: toPrimitive_FLOAT }, 'DOUBLE': { primitiveType: 'DOUBLE', toPrimitive: toPrimitive_DOUBLE }, 'BYTE_ARRAY': { primitiveType: 'BYTE_ARRAY', toPrimitive: toPrimitive_BYTE_ARRAY }, 'FIXED_LEN_BYTE_ARRAY': { primitiveType: 'FIXED_LEN_BYTE_ARRAY', toPrimitive: toPrimitive_BYTE_ARRAY }, 'UTF8': { primitiveType: 'BYTE_ARRAY', originalType: 'UTF8', toPrimitive: toPrimitive_UTF8, fromPrimitive: fromPrimitive_UTF8 }, 'ENUM': { primitiveType: 'BYTE_ARRAY', originalType: 'UTF8', toPrimitive: toPrimitive_UTF8, fromPrimitive: fromPrimitive_UTF8 }, 'TIME_MILLIS': { primitiveType: 'INT32', originalType: 'TIME_MILLIS', toPrimitive: toPrimitive_TIME_MILLIS }, 'TIME_MICROS': { primitiveType: 'INT64', originalType: 'TIME_MICROS', toPrimitive: toPrimitive_TIME_MICROS }, 'DATE': { primitiveType: 'INT32', originalType: 'DATE', toPrimitive: toPrimitive_DATE, fromPrimitive: fromPrimitive_DATE }, 'TIMESTAMP_MILLIS': { primitiveType: 'INT64', originalType: 'TIMESTAMP_MILLIS', toPrimitive: toPrimitive_TIMESTAMP_MILLIS, fromPrimitive: fromPrimitive_TIMESTAMP_MILLIS }, 'TIMESTAMP_MICROS': { primitiveType: 'INT64', originalType: 'TIMESTAMP_MICROS', toPrimitive: toPrimitive_TIMESTAMP_MICROS, fromPrimitive: fromPrimitive_TIMESTAMP_MICROS }, 'UINT_8': { primitiveType: 'INT32', originalType: 'UINT_8', toPrimitive: toPrimitive_UINT8 }, 'UINT_16': { primitiveType: 'INT32', originalType: 'UINT_16', toPrimitive: toPrimitive_UINT16 }, 'UINT_32': { primitiveType: 'INT32', originalType: 'UINT_32', toPrimitive: toPrimitive_UINT32 }, 'UINT_64': { primitiveType: 'INT64', originalType: 'UINT_64', toPrimitive: toPrimitive_UINT64 }, 'INT_8': { primitiveType: 'INT32', originalType: 'INT_8', toPrimitive: toPrimitive_INT8 }, 'INT_16': { primitiveType: 'INT32', originalType: 'INT_16', toPrimitive: toPrimitive_INT16 }, 'INT_32': { primitiveType: 'INT32', originalType: 'INT_32', toPrimitive: toPrimitive_INT32 }, 'INT_64': { primitiveType: 'INT64', originalType: 'INT_64', toPrimitive: toPrimitive_INT64 }, 'JSON': { primitiveType: 'BYTE_ARRAY', originalType: 'JSON', toPrimitive: toPrimitive_JSON, fromPrimitive: fromPrimitive_JSON }, 'BSON': { primitiveType: 'BYTE_ARRAY', originalType: 'BSON', toPrimitive: NOT_SUPPORTED, fromPrimitive: NOT_SUPPORTED }, 'INTERVAL': { primitiveType: 'FIXED_LEN_BYTE_ARRAY', originalType: 'INTERVAL', typeLength: 12, toPrimitive: toPrimitive_INTERVAL, fromPrimitive: fromPrimitive_INTERVAL } }; /** * Convert a value from it's native representation to the internal/underlying * primitive type */ function toPrimitive(type, value) { if (!(type in PARQUET_LOGICAL_TYPES)) { throw 'invalid type: ' + type; } return PARQUET_LOGICAL_TYPES[type].toPrimitive(value); } /** * Convert a value from it's internal/underlying primitive representation to * the native representation */ function fromPrimitive(type, value) { if (!(type in PARQUET_LOGICAL_TYPES)) { throw 'invalid type: ' + type; } if ("fromPrimitive" in PARQUET_LOGICAL_TYPES[type]) { return PARQUET_LOGICAL_TYPES[type].fromPrimitive(value); } else { return value; } } function toPrimitive_BOOLEAN(value) { return !!value; } function fromPrimitive_BOOLEAN(value) { return !!value; } function toPrimitive_FLOAT(value) { const v = parseFloat(value); if (isNaN(v)) { throw 'invalid value for FLOAT: ' + value; } return v; } function toPrimitive_DOUBLE(value) { const v = parseFloat(value); if (isNaN(v)) { throw 'invalid value for DOUBLE: ' + value; } return v; } function toPrimitive_INT8(value) { const v = parseInt(value, 10); if (v < -0x80 || v > 0x7f || isNaN(v)) { throw 'invalid value for INT8: ' + value; } return v; } function toPrimitive_UINT8(value) { const v = parseInt(value, 10); if (v < 0 || v > 0xff || isNaN(v)) { throw 'invalid value for UINT8: ' + value; } return v; } function toPrimitive_INT16(value) { const v = parseInt(value, 10); if (v < -0x8000 || v > 0x7fff || isNaN(v)) { throw 'invalid value for INT16: ' + value; } return v; } function toPrimitive_UINT16(value) { const v = parseInt(value, 10); if (v < 0 || v > 0xffff || isNaN(v)) { throw 'invalid value for UINT16: ' + value; } return v; } function toPrimitive_INT32(value) { const v = parseInt(value, 10); if (v < -0x80000000 || v > 0x7fffffff || isNaN(v)) { throw 'invalid value for INT32: ' + value; } return v; } function toPrimitive_UINT32(value) { const v = parseInt(value, 10); if (v < 0 || v > 0xffffffffffff || isNaN(v)) { throw 'invalid value for UINT32: ' + value; } return v; } function toPrimitive_INT64(value) { const v = parseInt(value, 10); if (isNaN(v)) { throw 'invalid value for INT64: ' + value; } return v; } function toPrimitive_UINT64(value) { const v = parseInt(value, 10); if (v < 0 || isNaN(v)) { throw 'invalid value for UINT64: ' + value; } return v; } function toPrimitive_INT96(value) { const v = parseInt(value, 10); if (isNaN(v)) { throw 'invalid value for INT96: ' + value; } return v; } function toPrimitive_BYTE_ARRAY(value) { return Buffer.from(value); } function toPrimitive_UTF8(value) { return Buffer.from(value, 'utf8'); } function fromPrimitive_UTF8(value) { return (value !== undefined && value !== null) ? value.toString() : value; } function toPrimitive_JSON(value) { return Buffer.from(JSON.stringify(value)); } function fromPrimitive_JSON(value) { return JSON.parse(value); } function toPrimitive_TIME_MILLIS(value) { const v = parseInt(value, 10); if (v < 0 || v > 0xffffffffffffffff || isNaN(v)) { throw 'invalid value for TIME_MILLIS: ' + value; } return v; } function toPrimitive_TIME_MICROS(value) { const v = BigInt(value); if (v < 0n || isNaN(v)) { throw 'invalid value for TIME_MICROS: ' + value; } return v; } const kMillisPerDay = 86400000; function toPrimitive_DATE(value) { /* convert from date */ if (value instanceof Date) { return value.getTime() / kMillisPerDay; } /* convert from integer */ { const v = parseInt(value, 10); if (v < 0 || isNaN(v)) { throw 'invalid value for DATE: ' + value; } return v; } } function fromPrimitive_DATE(value) { return new Date(+value * kMillisPerDay); } function toPrimitive_TIMESTAMP_MILLIS(value) { /* convert from date */ if (value instanceof Date) { return value.getTime(); } /* convert from integer */ { const v = parseInt(value, 10); if (v < 0 || isNaN(v)) { throw 'invalid value for TIMESTAMP_MILLIS: ' + value; } return v; } } function fromPrimitive_TIMESTAMP_MILLIS(value) { return new Date(+value); } function toPrimitive_TIMESTAMP_MICROS(value) { /* convert from date */ if (value instanceof Date) { return BigInt(value.getTime()) * 1000n; } /* convert from integer */ { const v = BigInt(value); if (v < 0n /*|| isNaN(v)*/) { throw 'invalid value for TIMESTAMP_MICROS: ' + value; } return v; } } function fromPrimitive_TIMESTAMP_MICROS(value) { return new Date(parseInt(value / 1000n)); } function toPrimitive_INTERVAL(value) { if (!value.months || !value.days || !value.milliseconds) { throw "value for INTERVAL must be object { months: ..., days: ..., milliseconds: ... }"; } let buf = Buffer.alloc(12); buf.writeUInt32LE(value.months, 0); buf.writeUInt32LE(value.days, 4); buf.writeUInt32LE(value.milliseconds, 8); return buf; } function fromPrimitive_INTERVAL(value) { const buf = Buffer.from(value); const months = buf.readUInt32LE(0); const days = buf.readUInt32LE(4); const millis = buf.readUInt32LE(8); return { months: months, days: days, milliseconds: millis }; } module.exports = { PARQUET_LOGICAL_TYPES, toPrimitive, fromPrimitive };