@severo_tests/hyparquet
Version:
Parquet file parser for JavaScript
181 lines (171 loc) • 5.45 kB
JavaScript
/**
* Read `count` values of the given type from the reader.view.
*
* @param {DataReader} reader - buffer to read data from
* @param {ParquetType} type - parquet type of the data
* @param {number} count - number of values to read
* @param {number | undefined} fixedLength - length of each fixed length byte array
* @returns {DecodedArray} array of values
*/
export function readPlain(reader, type, count, fixedLength) {
if (count === 0) return []
if (type === 'BOOLEAN') {
return readPlainBoolean(reader, count)
} else if (type === 'INT32') {
return readPlainInt32(reader, count)
} else if (type === 'INT64') {
return readPlainInt64(reader, count)
} else if (type === 'INT96') {
return readPlainInt96(reader, count)
} else if (type === 'FLOAT') {
return readPlainFloat(reader, count)
} else if (type === 'DOUBLE') {
return readPlainDouble(reader, count)
} else if (type === 'BYTE_ARRAY') {
return readPlainByteArray(reader, count)
} else if (type === 'FIXED_LEN_BYTE_ARRAY') {
if (!fixedLength) throw new Error('parquet missing fixed length')
return readPlainByteArrayFixed(reader, count, fixedLength)
} else {
throw new Error(`parquet unhandled type: ${type}`)
}
}
/**
* Read `count` boolean values.
*
* @param {DataReader} reader
* @param {number} count
* @returns {boolean[]}
*/
function readPlainBoolean(reader, count) {
const values = new Array(count)
for (let i = 0; i < count; i++) {
const byteOffset = reader.offset + (i / 8 | 0)
const bitOffset = i % 8
const byte = reader.view.getUint8(byteOffset)
values[i] = (byte & 1 << bitOffset) !== 0
}
reader.offset += Math.ceil(count / 8)
return values
}
/**
* Read `count` int32 values.
*
* @param {DataReader} reader
* @param {number} count
* @returns {Int32Array}
*/
function readPlainInt32(reader, count) {
const values = (reader.view.byteOffset + reader.offset) % 4
? new Int32Array(align(reader.view.buffer, reader.view.byteOffset + reader.offset, count * 4))
: new Int32Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count)
reader.offset += count * 4
return values
}
/**
* Read `count` int64 values.
*
* @param {DataReader} reader
* @param {number} count
* @returns {BigInt64Array}
*/
function readPlainInt64(reader, count) {
const values = (reader.view.byteOffset + reader.offset) % 8
? new BigInt64Array(align(reader.view.buffer, reader.view.byteOffset + reader.offset, count * 8))
: new BigInt64Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count)
reader.offset += count * 8
return values
}
/**
* Read `count` int96 values.
*
* @param {DataReader} reader
* @param {number} count
* @returns {bigint[]}
*/
function readPlainInt96(reader, count) {
const values = new Array(count)
for (let i = 0; i < count; i++) {
const low = reader.view.getBigInt64(reader.offset + i * 12, true)
const high = reader.view.getInt32(reader.offset + i * 12 + 8, true)
values[i] = BigInt(high) << 64n | low
}
reader.offset += count * 12
return values
}
/**
* Read `count` float values.
*
* @param {DataReader} reader
* @param {number} count
* @returns {Float32Array}
*/
function readPlainFloat(reader, count) {
const values = (reader.view.byteOffset + reader.offset) % 4
? new Float32Array(align(reader.view.buffer, reader.view.byteOffset + reader.offset, count * 4))
: new Float32Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count)
reader.offset += count * 4
return values
}
/**
* Read `count` double values.
*
* @param {DataReader} reader
* @param {number} count
* @returns {Float64Array}
*/
function readPlainDouble(reader, count) {
const values = (reader.view.byteOffset + reader.offset) % 8
? new Float64Array(align(reader.view.buffer, reader.view.byteOffset + reader.offset, count * 8))
: new Float64Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count)
reader.offset += count * 8
return values
}
/**
* Read `count` byte array values.
*
* @param {DataReader} reader
* @param {number} count
* @returns {Uint8Array[]}
*/
function readPlainByteArray(reader, count) {
const values = new Array(count)
for (let i = 0; i < count; i++) {
const length = reader.view.getUint32(reader.offset, true)
reader.offset += 4
values[i] = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, length)
reader.offset += length
}
return values
}
/**
* Read a fixed length byte array.
*
* @param {DataReader} reader
* @param {number} count
* @param {number} fixedLength
* @returns {Uint8Array[]}
*/
function readPlainByteArrayFixed(reader, count, fixedLength) {
// assert(reader.view.byteLength - reader.offset >= count * fixedLength)
const values = new Array(count)
for (let i = 0; i < count; i++) {
values[i] = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, fixedLength)
reader.offset += fixedLength
}
return values
}
/**
* Create a new buffer with the offset and size.
*
* @import {DataReader, DecodedArray, ParquetType} from '../src/types.d.ts'
* @param {ArrayBufferLike} buffer
* @param {number} offset
* @param {number} size
* @returns {ArrayBuffer}
*/
function align(buffer, offset, size) {
const aligned = new ArrayBuffer(size)
new Uint8Array(aligned).set(new Uint8Array(buffer, offset, size))
return aligned
}