UNPKG

byte-decoder

Version:

Decode data. Browser or NodeJS.

441 lines (363 loc) 11.6 kB
import LRUCache from '@neumatter/lru-cache' import ByteView from 'byteview' /** @typedef {{ utf8: (bytes: Uint8Array) => string, base64: (bytes: Uint8Array) => string, base64url: (bytes: Uint8Array) => string, ascii: (bytes: Uint8Array) => string, binary: (bytes: Uint8Array) => string, utf16le: (bytes: Uint8Array) => string, utf16be: (bytes: Uint8Array) => string, hex: (bytes: Uint8Array) => string, base32: (bytes: Uint8Array, usePadding: boolean) => string, base32hex: (bytes: Uint8Array, usePadding: boolean) => string, base32crockford: (bytes: Uint8Array, usePadding: boolean) => string }} DecodeFunctionMap */ /** @typedef {'utf8' | 'base64' | 'base64url' | 'ascii' | 'binary' | 'utf16le' | 'utf16be' | 'hex' | 'base32' | 'base32hex' | 'base32crockford'} Encoding */ const POOL_64 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' const URL_POOL_64 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_' const RFC4648_32 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' const RFC4648_HEX_32 = '0123456789ABCDEFGHIJKLMNOPQRSTUV' const CROCKFORD_32 = '0123456789ABCDEFGHJKMNPQRSTVWXYZ' const HEX_POOL = (function () { const alphabet = '0123456789abcdef' const table = new Array(256) for (let i = 0; i < 16; ++i) { const i16 = i * 16 for (let j = 0; j < 16; ++j) { table[i16 + j] = alphabet[i] + alphabet[j] } } return table })() export default class ByteDecoder { #cache = new LRUCache({ maxSize: 500 }) #encoding = 'utf8' #lookupTable = null #decodeInternal = (bytes) => { const codePoints = [] let index = 0 let nextIndex = 0 let response = '' for (const byte of decodeUtf8(bytes)) { codePoints[nextIndex++] = byte } if (nextIndex <= 0x1000) { return String.fromCharCode.apply(String, codePoints) } while (index < nextIndex) { response += String.fromCharCode.apply( String, codePoints.slice(index, index += 0x1000) ) } return response } constructor (encoding = 'utf8') { switch (encoding) { case 'utf8': case 'utf-8': { break } case 'base32': { this.#encoding = 'base32' this.#lookupTable = RFC4648_32 this.#decodeInternal = (bytes, usePadding = true) => { const { length } = bytes let index = -1 let bits = 0 let value = 0 let response = '' while (++index < length) { const byte = bytes[index] value = (value << 0x8) | byte bits += 8 while (bits >= 5) { response += this.#lookupTable[(value >>> (bits - 5)) & 0x1F] bits -= 5 } } if (bits > 0) { response += this.#lookupTable[(value << (5 - bits)) & 0x1F] } if (usePadding) { while (response.length % 8 !== 0) { response += '=' } } return response } break } case 'base32hex': { this.#encoding = 'base32hex' this.#lookupTable = RFC4648_HEX_32 this.#decodeInternal = (bytes, usePadding = true) => { const { length } = bytes let index = -1 let bits = 0 let value = 0 let response = '' while (++index < length) { const byte = bytes[index] value = (value << 0x8) | byte bits += 8 while (bits >= 5) { response += this.#lookupTable[(value >>> (bits - 5)) & 0x1F] bits -= 5 } } if (bits > 0) { response += this.#lookupTable[(value << (5 - bits)) & 0x1F] } if (usePadding) { while (response.length % 8 !== 0) { response += '=' } } return response } break } case 'base32crockford': { this.#encoding = 'base32crockford' this.#lookupTable = CROCKFORD_32 this.#decodeInternal = (bytes, usePadding = true) => { const { length } = bytes let index = -1 let bits = 0 let value = 0 let response = '' while (++index < length) { const byte = bytes[index] value = (value << 0x8) | byte bits += 8 while (bits >= 5) { response += this.#lookupTable[(value >>> (bits - 5)) & 0x1F] bits -= 5 } } if (bits > 0) { response += this.#lookupTable[(value << (5 - bits)) & 0x1F] } if (usePadding) { while (response.length % 8 !== 0) { response += '=' } } return response } break } case 'base64': { this.#encoding = 'base64' this.#lookupTable = POOL_64 this.#decodeInternal = (bytes) => { const { length } = bytes const extraBytes = length % 3 const chunksLength = length - extraBytes let index = -1 let bits = 0 let value = 0 let response = '' while (index + 3 < chunksLength) { const temp = ( ((bytes[++index] << 16) & 0xFF0000) + ((bytes[++index] << 8) & 0xFF00) + (bytes[++index] & 0xFF) ) response += ( this.#lookupTable[temp >> 18 & 0x3F] + this.#lookupTable[temp >> 12 & 0x3F] + this.#lookupTable[temp >> 6 & 0x3F] + this.#lookupTable[temp & 0x3F] ) } while (++index < length) { const byte = bytes[index] value = (value << 0x8) | byte bits += 8 while (bits >= 6) { response += this.#lookupTable[(value >>> (bits - 6)) & 0x3F] bits -= 6 } } if (bits > 0) { response += this.#lookupTable[(value << (6 - bits)) & 0x3F] } if (extraBytes === 1) { response += '==' } else if (extraBytes === 2) { response += '=' } return response } break } case 'base64url': { this.#encoding = 'base64url' this.#lookupTable = URL_POOL_64 this.#decodeInternal = (bytes) => { const { length } = bytes const extraBytes = length % 3 const chunksLength = length - extraBytes let index = -1 let bits = 0 let value = 0 let response = '' while (index + 3 < chunksLength) { const temp = ( ((bytes[++index] << 16) & 0xFF0000) + ((bytes[++index] << 8) & 0xFF00) + (bytes[++index] & 0xFF) ) response += ( this.#lookupTable[temp >> 18 & 0x3F] + this.#lookupTable[temp >> 12 & 0x3F] + this.#lookupTable[temp >> 6 & 0x3F] + this.#lookupTable[temp & 0x3F] ) } while (++index < length) { const byte = bytes[index] value = (value << 0x8) | byte bits += 8 while (bits >= 6) { response += this.#lookupTable[(value >>> (bits - 6)) & 0x3F] bits -= 6 } } if (bits > 0) { response += this.#lookupTable[(value << (6 - bits)) & 0x3F] } return response } break } case 'ascii': { this.#encoding = 'ascii' this.#decodeInternal = (bytes) => { let response = '' const { length } = bytes let index = -1 while (++index < length) { response += String.fromCharCode(bytes[index] & 0x7F) } return response } break } case 'latin1': case 'binary': { this.#encoding = 'binary' this.#decodeInternal = (bytes) => { let response = '' const { length } = bytes let index = -1 while (++index < length) { response += String.fromCharCode(bytes[index]) } return response } break } case 'ucs2': case 'ucs-2': case 'utf16le': case 'utf-16le': { this.#encoding = 'utf16le' this.#decodeInternal = (bytes) => { let response = '' const { length } = bytes let index = -1 while (++index < length) { response += String.fromCharCode(bytes[index] + (bytes[++index] * 256)) } return response } break } case 'utf16be': case 'utf-16be': { this.#encoding = 'utf16be' this.#decodeInternal = (bytes) => { let response = '' const { length } = bytes let index = -1 while (++index < (length - 1)) { response += String.fromCharCode(bytes[index + 1] | (bytes[index] << 8)) ++index } return response } break } case 'hex': { this.#encoding = 'hex' this.#lookupTable = HEX_POOL this.#decodeInternal = function (bytes) { let response = '' const { length } = bytes let index = -1 while (++index < (length - 1)) { response += this.#lookupTable[bytes[index]] ++index } return response } break } default: break } } get encoding () { return this.#encoding } decode (data) { if (!ByteView.isByteView(data)) { data = ByteView.from(data) } const key = [...data].toString() let response = this.#cache.get(key) if (response === undefined) { response = this.#decodeInternal(data) this.#cache.set(key, response) } return response } } function * decodeUtf8 (bytes) { const { length } = bytes let index = -1 while (++index < length) { const li = length - index const firstByte = bytes[index] let codePoint = null if (firstByte > 0xf7 && li > 4) { codePoint = (firstByte & 0xf) << 0x18 codePoint |= (bytes[++index] & 0x3f) << 0x12 codePoint |= (bytes[++index] & 0x3f) << 0xc codePoint |= (bytes[++index] & 0x3f) << 0x6 codePoint |= bytes[++index] & 0x3f } else if (firstByte > 0xef && li > 3) { codePoint = (firstByte & 0xf) << 0x12 codePoint |= (bytes[++index] & 0x3f) << 0xc codePoint |= (bytes[++index] & 0x3f) << 0x6 codePoint |= bytes[++index] & 0x3f } else if (firstByte > 0xdf && li > 2) { codePoint = (firstByte & 0xf) << 0xc codePoint |= (bytes[++index] & 0x3f) << 0x6 codePoint |= bytes[++index] & 0x3f } else if (firstByte > 0xbf && li > 1) { codePoint = (firstByte & 0x1f) << 0x6 codePoint |= bytes[++index] & 0x3f } else if (firstByte < 0x80) { codePoint = firstByte } if (codePoint === null) { // we did not generate a valid codePoint so insert a // replacement char (U+FFFD) and advance only 1 byte codePoint = 0xfffd } else if (codePoint > 0xffff) { // encode to utf16 (surrogate pair) codePoint -= 0x10000 yield * [ ((codePoint >>> 10) & 0x3ff) | 0xd800, 0xdc00 | (codePoint & 0x3ff) ] continue } yield codePoint } }