UNPKG

hyparquet-writer

Version:

Parquet file writer for JavaScript

136 lines (123 loc) 3.89 kB
/** * @import {DecodedArray, ParquetType} from 'hyparquet' * @import {Writer} from '../src/types.js' * @param {Writer} writer * @param {DecodedArray} values * @param {ParquetType} type * @param {number | undefined} fixedLength */ export function writePlain(writer, values, type, fixedLength) { if (type === 'BOOLEAN') { writePlainBoolean(writer, values) } else if (type === 'INT32') { writePlainInt32(writer, values) } else if (type === 'INT64') { writePlainInt64(writer, values) } else if (type === 'FLOAT') { writePlainFloat(writer, values) } else if (type === 'DOUBLE') { writePlainDouble(writer, values) } else if (type === 'BYTE_ARRAY') { writePlainByteArray(writer, values) } else if (type === 'FIXED_LEN_BYTE_ARRAY') { if (!fixedLength) throw new Error('parquet FIXED_LEN_BYTE_ARRAY expected type_length') writePlainByteArrayFixed(writer, values, fixedLength) } else { throw new Error(`parquet unsupported type: ${type}`) } } /** * @param {Writer} writer * @param {DecodedArray} values */ function writePlainBoolean(writer, values) { let currentByte = 0 for (let i = 0; i < values.length; i++) { const value = values[i] if (typeof value !== 'boolean') throw new Error('parquet expected boolean value, got ' + value) const bitOffset = i % 8 if (value) { currentByte |= 1 << bitOffset } // once we've packed 8 bits or are at a multiple of 8, we write out the byte if (bitOffset === 7) { writer.appendUint8(currentByte) currentByte = 0 } } // if the array length is not a multiple of 8, write the leftover bits if (values.length % 8) { writer.appendUint8(currentByte) } } /** * @param {Writer} writer * @param {DecodedArray} values */ function writePlainInt32(writer, values) { for (const value of values) { if (!Number.isSafeInteger(value)) throw new Error('parquet expected integer value, got ' + value) if (value < -2147483648 || value > 2147483647) throw new Error('parquet expected int32 value, got ' + value) writer.appendInt32(value) } } /** * @param {Writer} writer * @param {DecodedArray} values */ function writePlainInt64(writer, values) { for (const value of values) { if (typeof value !== 'bigint') throw new Error('parquet expected bigint value, got ' + value) writer.appendInt64(value) } } /** * @param {Writer} writer * @param {DecodedArray} values */ function writePlainFloat(writer, values) { for (const value of values) { if (typeof value !== 'number') throw new Error('parquet expected number value, got ' + value) writer.appendFloat32(value) } } /** * @param {Writer} writer * @param {DecodedArray} values */ function writePlainDouble(writer, values) { for (const value of values) { if (typeof value !== 'number') throw new Error('parquet expected number value, got ' + value) writer.appendFloat64(value) } } /** * @param {Writer} writer * @param {DecodedArray} values */ function writePlainByteArray(writer, values) { for (const value of values) { let bytes = value if (typeof bytes === 'string') { // convert string to Uint8Array bytes = new TextEncoder().encode(value) } if (!(bytes instanceof Uint8Array)) { throw new Error('parquet expected Uint8Array value, got ' + typeof bytes) } writer.appendUint32(bytes.length) writer.appendBytes(bytes) } } /** * @param {Writer} writer * @param {DecodedArray} values * @param {number} fixedLength */ function writePlainByteArrayFixed(writer, values, fixedLength) { for (const value of values) { if (!(value instanceof Uint8Array)) throw new Error('parquet expected Uint8Array value, got ' + typeof value) if (value.length !== fixedLength) throw new Error(`parquet expected Uint8Array of length ${fixedLength}`) writer.appendBytes(value) } }