@severo_tests/hyparquet
Version:
Parquet file parser for JavaScript
128 lines (119 loc) • 3.63 kB
JavaScript
/**
* The MIT License (MIT)
* Copyright (c) 2016 Zhipeng Jia
* https://github.com/zhipeng-jia/snappyjs
*/
const WORD_MASK = [0, 0xff, 0xffff, 0xffffff, 0xffffffff]
/**
* Copy bytes from one array to another
*
* @param {Uint8Array} fromArray source array
* @param {number} fromPos source position
* @param {Uint8Array} toArray destination array
* @param {number} toPos destination position
* @param {number} length number of bytes to copy
*/
function copyBytes(fromArray, fromPos, toArray, toPos, length) {
for (let i = 0; i < length; i++) {
toArray[toPos + i] = fromArray[fromPos + i]
}
}
/**
* Decompress snappy data.
* Accepts an output buffer to avoid allocating a new buffer for each call.
*
* @param {Uint8Array} input compressed data
* @param {Uint8Array} output output buffer
*/
export function snappyUncompress(input, output) {
const inputLength = input.byteLength
const outputLength = output.byteLength
let pos = 0
let outPos = 0
// skip preamble (contains uncompressed length as varint)
while (pos < inputLength) {
const c = input[pos]
pos++
if (c < 128) {
break
}
}
if (outputLength && pos >= inputLength) {
throw new Error('invalid snappy length header')
}
while (pos < inputLength) {
const c = input[pos]
let len = 0
pos++
if (pos >= inputLength) {
throw new Error('missing eof marker')
}
// There are two types of elements, literals and copies (back references)
if ((c & 0x3) === 0) {
// Literals are uncompressed data stored directly in the byte stream
let len = (c >>> 2) + 1
// Longer literal length is encoded in multiple bytes
if (len > 60) {
if (pos + 3 >= inputLength) {
throw new Error('snappy error literal pos + 3 >= inputLength')
}
const lengthSize = len - 60 // length bytes - 1
len = input[pos]
+ (input[pos + 1] << 8)
+ (input[pos + 2] << 16)
+ (input[pos + 3] << 24)
len = (len & WORD_MASK[lengthSize]) + 1
pos += lengthSize
}
if (pos + len > inputLength) {
throw new Error('snappy error literal exceeds input length')
}
copyBytes(input, pos, output, outPos, len)
pos += len
outPos += len
} else {
// Copy elements
let offset = 0 // offset back from current position to read
switch (c & 0x3) {
case 1:
// Copy with 1-byte offset
len = (c >>> 2 & 0x7) + 4
offset = input[pos] + (c >>> 5 << 8)
pos++
break
case 2:
// Copy with 2-byte offset
if (inputLength <= pos + 1) {
throw new Error('snappy error end of input')
}
len = (c >>> 2) + 1
offset = input[pos] + (input[pos + 1] << 8)
pos += 2
break
case 3:
// Copy with 4-byte offset
if (inputLength <= pos + 3) {
throw new Error('snappy error end of input')
}
len = (c >>> 2) + 1
offset = input[pos]
+ (input[pos + 1] << 8)
+ (input[pos + 2] << 16)
+ (input[pos + 3] << 24)
pos += 4
break
default:
break
}
if (offset === 0 || isNaN(offset)) {
throw new Error(`invalid offset ${offset} pos ${pos} inputLength ${inputLength}`)
}
if (offset > outPos) {
throw new Error('cannot copy from before start of buffer')
}
copyBytes(output, outPos - offset, output, outPos, len)
outPos += len
}
}
if (outPos !== outputLength) throw new Error('premature end of input')
}