byte-decoder
Version:
Decode data. Browser or NodeJS.
441 lines (363 loc) • 11.6 kB
JavaScript
import LRUCache from '@neumatter/lru-cache'
import ByteView from 'byteview'
/** @typedef {{ utf8: (bytes: Uint8Array) => string, base64: (bytes: Uint8Array) => string, base64url: (bytes: Uint8Array) => string, ascii: (bytes: Uint8Array) => string, binary: (bytes: Uint8Array) => string, utf16le: (bytes: Uint8Array) => string, utf16be: (bytes: Uint8Array) => string, hex: (bytes: Uint8Array) => string, base32: (bytes: Uint8Array, usePadding: boolean) => string, base32hex: (bytes: Uint8Array, usePadding: boolean) => string, base32crockford: (bytes: Uint8Array, usePadding: boolean) => string }} DecodeFunctionMap */
/** @typedef {'utf8' | 'base64' | 'base64url' | 'ascii' | 'binary' | 'utf16le' | 'utf16be' | 'hex' | 'base32' | 'base32hex' | 'base32crockford'} Encoding */
const POOL_64 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
const URL_POOL_64 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
const RFC4648_32 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
const RFC4648_HEX_32 = '0123456789ABCDEFGHIJKLMNOPQRSTUV'
const CROCKFORD_32 = '0123456789ABCDEFGHJKMNPQRSTVWXYZ'
const HEX_POOL = (function () {
const alphabet = '0123456789abcdef'
const table = new Array(256)
for (let i = 0; i < 16; ++i) {
const i16 = i * 16
for (let j = 0; j < 16; ++j) {
table[i16 + j] = alphabet[i] + alphabet[j]
}
}
return table
})()
export default class ByteDecoder {
#cache = new LRUCache({
maxSize: 500
})
#encoding = 'utf8'
#lookupTable = null
#decodeInternal = (bytes) => {
const codePoints = []
let index = 0
let nextIndex = 0
let response = ''
for (const byte of decodeUtf8(bytes)) {
codePoints[nextIndex++] = byte
}
if (nextIndex <= 0x1000) {
return String.fromCharCode.apply(String, codePoints)
}
while (index < nextIndex) {
response += String.fromCharCode.apply(
String,
codePoints.slice(index, index += 0x1000)
)
}
return response
}
constructor (encoding = 'utf8') {
switch (encoding) {
case 'utf8':
case 'utf-8': {
break
}
case 'base32': {
this.#encoding = 'base32'
this.#lookupTable = RFC4648_32
this.#decodeInternal = (bytes, usePadding = true) => {
const { length } = bytes
let index = -1
let bits = 0
let value = 0
let response = ''
while (++index < length) {
const byte = bytes[index]
value = (value << 0x8) | byte
bits += 8
while (bits >= 5) {
response += this.#lookupTable[(value >>> (bits - 5)) & 0x1F]
bits -= 5
}
}
if (bits > 0) {
response += this.#lookupTable[(value << (5 - bits)) & 0x1F]
}
if (usePadding) {
while (response.length % 8 !== 0) {
response += '='
}
}
return response
}
break
}
case 'base32hex': {
this.#encoding = 'base32hex'
this.#lookupTable = RFC4648_HEX_32
this.#decodeInternal = (bytes, usePadding = true) => {
const { length } = bytes
let index = -1
let bits = 0
let value = 0
let response = ''
while (++index < length) {
const byte = bytes[index]
value = (value << 0x8) | byte
bits += 8
while (bits >= 5) {
response += this.#lookupTable[(value >>> (bits - 5)) & 0x1F]
bits -= 5
}
}
if (bits > 0) {
response += this.#lookupTable[(value << (5 - bits)) & 0x1F]
}
if (usePadding) {
while (response.length % 8 !== 0) {
response += '='
}
}
return response
}
break
}
case 'base32crockford': {
this.#encoding = 'base32crockford'
this.#lookupTable = CROCKFORD_32
this.#decodeInternal = (bytes, usePadding = true) => {
const { length } = bytes
let index = -1
let bits = 0
let value = 0
let response = ''
while (++index < length) {
const byte = bytes[index]
value = (value << 0x8) | byte
bits += 8
while (bits >= 5) {
response += this.#lookupTable[(value >>> (bits - 5)) & 0x1F]
bits -= 5
}
}
if (bits > 0) {
response += this.#lookupTable[(value << (5 - bits)) & 0x1F]
}
if (usePadding) {
while (response.length % 8 !== 0) {
response += '='
}
}
return response
}
break
}
case 'base64': {
this.#encoding = 'base64'
this.#lookupTable = POOL_64
this.#decodeInternal = (bytes) => {
const { length } = bytes
const extraBytes = length % 3
const chunksLength = length - extraBytes
let index = -1
let bits = 0
let value = 0
let response = ''
while (index + 3 < chunksLength) {
const temp = (
((bytes[++index] << 16) & 0xFF0000) +
((bytes[++index] << 8) & 0xFF00) +
(bytes[++index] & 0xFF)
)
response += (
this.#lookupTable[temp >> 18 & 0x3F] +
this.#lookupTable[temp >> 12 & 0x3F] +
this.#lookupTable[temp >> 6 & 0x3F] +
this.#lookupTable[temp & 0x3F]
)
}
while (++index < length) {
const byte = bytes[index]
value = (value << 0x8) | byte
bits += 8
while (bits >= 6) {
response += this.#lookupTable[(value >>> (bits - 6)) & 0x3F]
bits -= 6
}
}
if (bits > 0) {
response += this.#lookupTable[(value << (6 - bits)) & 0x3F]
}
if (extraBytes === 1) {
response += '=='
} else if (extraBytes === 2) {
response += '='
}
return response
}
break
}
case 'base64url': {
this.#encoding = 'base64url'
this.#lookupTable = URL_POOL_64
this.#decodeInternal = (bytes) => {
const { length } = bytes
const extraBytes = length % 3
const chunksLength = length - extraBytes
let index = -1
let bits = 0
let value = 0
let response = ''
while (index + 3 < chunksLength) {
const temp = (
((bytes[++index] << 16) & 0xFF0000) +
((bytes[++index] << 8) & 0xFF00) +
(bytes[++index] & 0xFF)
)
response += (
this.#lookupTable[temp >> 18 & 0x3F] +
this.#lookupTable[temp >> 12 & 0x3F] +
this.#lookupTable[temp >> 6 & 0x3F] +
this.#lookupTable[temp & 0x3F]
)
}
while (++index < length) {
const byte = bytes[index]
value = (value << 0x8) | byte
bits += 8
while (bits >= 6) {
response += this.#lookupTable[(value >>> (bits - 6)) & 0x3F]
bits -= 6
}
}
if (bits > 0) {
response += this.#lookupTable[(value << (6 - bits)) & 0x3F]
}
return response
}
break
}
case 'ascii': {
this.#encoding = 'ascii'
this.#decodeInternal = (bytes) => {
let response = ''
const { length } = bytes
let index = -1
while (++index < length) {
response += String.fromCharCode(bytes[index] & 0x7F)
}
return response
}
break
}
case 'latin1':
case 'binary': {
this.#encoding = 'binary'
this.#decodeInternal = (bytes) => {
let response = ''
const { length } = bytes
let index = -1
while (++index < length) {
response += String.fromCharCode(bytes[index])
}
return response
}
break
}
case 'ucs2':
case 'ucs-2':
case 'utf16le':
case 'utf-16le': {
this.#encoding = 'utf16le'
this.#decodeInternal = (bytes) => {
let response = ''
const { length } = bytes
let index = -1
while (++index < length) {
response += String.fromCharCode(bytes[index] + (bytes[++index] * 256))
}
return response
}
break
}
case 'utf16be':
case 'utf-16be': {
this.#encoding = 'utf16be'
this.#decodeInternal = (bytes) => {
let response = ''
const { length } = bytes
let index = -1
while (++index < (length - 1)) {
response += String.fromCharCode(bytes[index + 1] | (bytes[index] << 8))
++index
}
return response
}
break
}
case 'hex': {
this.#encoding = 'hex'
this.#lookupTable = HEX_POOL
this.#decodeInternal = function (bytes) {
let response = ''
const { length } = bytes
let index = -1
while (++index < (length - 1)) {
response += this.#lookupTable[bytes[index]]
++index
}
return response
}
break
}
default:
break
}
}
get encoding () {
return this.#encoding
}
decode (data) {
if (!ByteView.isByteView(data)) {
data = ByteView.from(data)
}
const key = [...data].toString()
let response = this.#cache.get(key)
if (response === undefined) {
response = this.#decodeInternal(data)
this.#cache.set(key, response)
}
return response
}
}
function * decodeUtf8 (bytes) {
const { length } = bytes
let index = -1
while (++index < length) {
const li = length - index
const firstByte = bytes[index]
let codePoint = null
if (firstByte > 0xf7 && li > 4) {
codePoint = (firstByte & 0xf) << 0x18
codePoint |= (bytes[++index] & 0x3f) << 0x12
codePoint |= (bytes[++index] & 0x3f) << 0xc
codePoint |= (bytes[++index] & 0x3f) << 0x6
codePoint |= bytes[++index] & 0x3f
} else if (firstByte > 0xef && li > 3) {
codePoint = (firstByte & 0xf) << 0x12
codePoint |= (bytes[++index] & 0x3f) << 0xc
codePoint |= (bytes[++index] & 0x3f) << 0x6
codePoint |= bytes[++index] & 0x3f
} else if (firstByte > 0xdf && li > 2) {
codePoint = (firstByte & 0xf) << 0xc
codePoint |= (bytes[++index] & 0x3f) << 0x6
codePoint |= bytes[++index] & 0x3f
} else if (firstByte > 0xbf && li > 1) {
codePoint = (firstByte & 0x1f) << 0x6
codePoint |= bytes[++index] & 0x3f
} else if (firstByte < 0x80) {
codePoint = firstByte
}
if (codePoint === null) {
// we did not generate a valid codePoint so insert a
// replacement char (U+FFFD) and advance only 1 byte
codePoint = 0xfffd
} else if (codePoint > 0xffff) {
// encode to utf16 (surrogate pair)
codePoint -= 0x10000
yield * [
((codePoint >>> 10) & 0x3ff) | 0xd800,
0xdc00 | (codePoint & 0x3ff)
]
continue
}
yield codePoint
}
}