UNPKG

@cto.af/textdecoder

Version:

Quick wrapper that finds TextDecoder in your runtime, or uses an adequate polyfill

237 lines (222 loc) 5.55 kB
'use strict'; const REPLACEMENT = '\ufffd'; const BYTES = [ // 0b00_000 - 0b01_111: ASCII 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0b10_000 - 0b10_111: Continuation -1, // 16 -1, -1, -1, -1, -1, -1, -1, // 0b110_00 - 0b110_11: Two bytes 1, // 24 1, 1, 1, // 0b1110_0 - 0b1110_1: Three bytes 2, // 28 2, // 0b11110: Four bytes 3, // 30 // 0b11111: Invalid -2, // 31 ]; const ERR_MSG = '[ERR_ENCODING_INVALID_ENCODED_DATA]: ' + 'The encoded data was not valid for encoding utf-8'; function badLength(state) { if (state.cur < 0x80) { return true; } if (state.cur < 0x800) { if (state.total !== 1) { return true; } } else if (state.cur < 0x010000) { if (state.total !== 2) { return true; } } return false; } function surrogate(state) { return (state.cur >= 0xd800) && (state.cur <= 0xdfff); } function utf8Decode(buf, fatal, state) { if (!state) { state = {cur: 0, left: 0, total: 0}; } let res = ''; for (const b of buf) { const bytes = BYTES[b >> 3]; switch (bytes) { case -2: // Top 5 bits all set state.cur = 0; state.left = 0; state.total = 0; if (fatal) { const err = new TypeError(ERR_MSG); err.code = 'ERR_ENCODING_INVALID_ENCODED_DATA'; err.errno = 12; throw err; } else { res += REPLACEMENT; } break; case -1: state.left--; if (state.left < 0) { // Too many continuation bytes state.cur = 0; state.left = 0; state.total = 0; if (fatal) { const err = new TypeError(ERR_MSG); err.code = 'ERR_ENCODING_INVALID_ENCODED_DATA'; err.errno = 12; throw err; } else { res += REPLACEMENT; } } else { state.cur = (state.cur << 6) | (b & 0x3f); if (state.left === 0) { if (badLength(state) || surrogate(state)) { if (fatal) { const err = new TypeError(ERR_MSG); err.errno = 12; err.code = 'ERR_ENCODING_INVALID_ENCODED_DATA'; throw err; } else { res += REPLACEMENT; } } else { res += String.fromCodePoint(state.cur); state.cur = 0; } } } break; case 0: // One ASCII7 byte if ((state.cur !== 0) || (state.left !== 0)) { // Not enough continuation bytes state.cur = 0; state.left = 0; state.total = 0; if (fatal) { const err = new TypeError(ERR_MSG); err.code = 'ERR_ENCODING_INVALID_ENCODED_DATA'; err.errno = 12; throw err; } else { res += REPLACEMENT; } } res += String.fromCharCode(b); break; default: if ((state.cur !== 0) || (state.left !== 0)) { // Not enough continuation bytes state.cur = 0; state.left = 0; state.total = 0; if (fatal) { const err = new TypeError(ERR_MSG); err.code = 'ERR_ENCODING_INVALID_ENCODED_DATA'; err.errno = 12; throw err; } else { res += REPLACEMENT; } } state.total = bytes; state.left = bytes; state.cur = b & (0xff >> (bytes + 2)); break; } } return [res, state]; } class TextDecoderPolyfill { constructor(utfLabel, options) { this.encoding = (utfLabel || 'utf-8').toLowerCase(); if ((this.encoding !== 'utf-8') && (this.encoding !== 'utf8')) { const err = new RangeError('The "' + utfLabel + '" encoding is not supported'); err.code = 'ERR_ENCODING_NOT_SUPPORTED'; throw err; } options = options || {}; this.fatal = Boolean(options.fatal); this.ignoreBOM = Boolean(options.ignoreBOM); Object.defineProperty(this, 'state', { value: null, configurable: false, enumerable: false, writable: true, }); } decode(input, options) { if (!(input instanceof Uint8Array)) { if (input instanceof ArrayBuffer) { input = new Uint8Array(input); } else if (ArrayBuffer.isView(input)) { input = new Uint8Array( input.buffer, input.byteOffset, input.byteLength ); } else { const typ = typeof input; const err = new TypeError('The "input" argument must be an instance of ArrayBuffer or ArrayBufferView. Received type ' + typ); err.code = 'ERR_INVALID_ARG_TYPE'; throw err; } } const str_state = utf8Decode(input, this.fatal, this.state); let str = str_state[0]; const state = str_state[1]; if (options && options.stream) { this.state = state; } else { this.state = null; if (state.left !== 0) { // Truncated if (this.fatal) { const err = new TypeError(ERR_MSG); err.code = 'ERR_ENCODING_INVALID_ENCODED_DATA'; err.errno = 11; throw err; } else { str += REPLACEMENT; } } } if (!this.ignoreBOM) { // U+FEFF: BYTE ORDER MARK if (str.codePointAt(0) === 0xFEFF) { return str.slice(1); } } return str; } } module.exports = TextDecoderPolyfill;