@loaders.gl/polyfills
Version:
Polyfills for TextEncoder/TextDecoder
1,749 lines (1,550 loc) • 94 kB
text/typescript
// @ts-nocheck
/* eslint-disable */
// Copied from https://github.com/inexorabletash/text-encoding/blob/b4e5bc26e26e51f56e3daa9f13138c79f49d3c34/lib/encoding.js
// This is free and unencumbered software released into the public domain.
// See LICENSE.md for more information.
import indexes from './encoding-indexes';
// Note: Aaian character indices add half a megabyte to bundle. Ignore, since we really only want the built-in UTF8...
// import indexes from './encoding-indexes-asian.js';
globalThis['encoding-indexes'] = indexes || {};
//
// Utilities
//
/**
* @param {number} a The number to test.
* @param {number} min The minimum value in the range, inclusive.
* @param {number} max The maximum value in the range, inclusive.
* @return {boolean} True if a >= min and a <= max.
*/
function inRange(a, min, max) {
return min <= a && a <= max;
}
/**
* @param {!Array.<*>} array The array to check.
* @param {*} item The item to look for in the array.
* @return {boolean} True if the item appears in the array.
*/
function includes(array, item) {
return array.indexOf(item) !== -1;
}
var floor = Math.floor;
/**
* @param {*} o
* @return {Object}
*/
function ToDictionary(o) {
if (o === undefined) return {};
if (o === Object(o)) return o;
throw TypeError('Could not convert argument to dictionary');
}
/**
* @param {string} string Input string of UTF-16 code units.
* @return {!Array.<number>} Code points.
*/
function stringToCodePoints(string) {
// https://heycam.github.io/webidl/#dfn-obtain-unicode
// 1. Let S be the DOMString value.
var s = String(string);
// 2. Let n be the length of S.
var n = s.length;
// 3. Initialize i to 0.
var i = 0;
// 4. Initialize U to be an empty sequence of Unicode characters.
var u = [];
// 5. While i < n:
while (i < n) {
// 1. Let c be the code unit in S at index i.
var c = s.charCodeAt(i);
// 2. Depending on the value of c:
// c < 0xD800 or c > 0xDFFF
if (c < 0xd800 || c > 0xdfff) {
// Append to U the Unicode character with code point c.
u.push(c);
}
// 0xDC00 ≤ c ≤ 0xDFFF
else if (0xdc00 <= c && c <= 0xdfff) {
// Append to U a U+FFFD REPLACEMENT CHARACTER.
u.push(0xfffd);
}
// 0xD800 ≤ c ≤ 0xDBFF
else if (0xd800 <= c && c <= 0xdbff) {
// 1. If i = n−1, then append to U a U+FFFD REPLACEMENT
// CHARACTER.
if (i === n - 1) {
u.push(0xfffd);
}
// 2. Otherwise, i < n−1:
else {
// 1. Let d be the code unit in S at index i+1.
var d = s.charCodeAt(i + 1);
// 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
if (0xdc00 <= d && d <= 0xdfff) {
// 1. Let a be c & 0x3FF.
var a = c & 0x3ff;
// 2. Let b be d & 0x3FF.
var b = d & 0x3ff;
// 3. Append to U the Unicode character with code point
// 2^16+2^10*a+b.
u.push(0x10000 + (a << 10) + b);
// 4. Set i to i+1.
i += 1;
}
// 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a
// U+FFFD REPLACEMENT CHARACTER.
else {
u.push(0xfffd);
}
}
}
// 3. Set i to i+1.
i += 1;
}
// 6. Return U.
return u;
}
/**
* @param {!Array.<number>} code_points Array of code points.
* @return {string} string String of UTF-16 code units.
*/
function codePointsToString(code_points) {
var s = '';
for (var i = 0; i < code_points.length; ++i) {
var cp = code_points[i];
if (cp <= 0xffff) {
s += String.fromCharCode(cp);
} else {
cp -= 0x10000;
s += String.fromCharCode((cp >> 10) + 0xd800, (cp & 0x3ff) + 0xdc00);
}
}
return s;
}
//
// Implementation of Encoding specification
// https://encoding.spec.whatwg.org/
//
//
// 4. Terminology
//
/**
* An ASCII byte is a byte in the range 0x00 to 0x7F, inclusive.
* @param {number} a The number to test.
* @return {boolean} True if a is in the range 0x00 to 0x7F, inclusive.
*/
function isASCIIByte(a) {
return 0x00 <= a && a <= 0x7f;
}
/**
* An ASCII code point is a code point in the range U+0000 to
* U+007F, inclusive.
*/
var isASCIICodePoint = isASCIIByte;
/**
* End-of-stream is a special token that signifies no more tokens
* are in the stream.
* @const
*/ var end_of_stream = -1;
/**
* A stream represents an ordered sequence of tokens.
*
* @constructor
* @param {!(Array.<number>|Uint8Array)} tokens Array of tokens that provide
* the stream.
*/
function Stream(tokens) {
/** @type {!Array.<number>} */
this.tokens = [].slice.call(tokens);
// Reversed as push/pop is more efficient than shift/unshift.
this.tokens.reverse();
}
Stream.prototype = {
/**
* @return {boolean} True if end-of-stream has been hit.
*/
endOfStream: function () {
return !this.tokens.length;
},
/**
* When a token is read from a stream, the first token in the
* stream must be returned and subsequently removed, and
* end-of-stream must be returned otherwise.
*
* @return {number} Get the next token from the stream, or
* end_of_stream.
*/
read: function () {
if (!this.tokens.length) return end_of_stream;
return this.tokens.pop();
},
/**
* When one or more tokens are prepended to a stream, those tokens
* must be inserted, in given order, before the first token in the
* stream.
*
* @param {(number|!Array.<number>)} token The token(s) to prepend to the
* stream.
*/
prepend: function (token) {
if (Array.isArray(token)) {
var tokens = /**@type {!Array.<number>}*/ token;
while (tokens.length) this.tokens.push(tokens.pop());
} else {
this.tokens.push(token);
}
},
/**
* When one or more tokens are pushed to a stream, those tokens
* must be inserted, in given order, after the last token in the
* stream.
*
* @param {(number|!Array.<number>)} token The tokens(s) to push to the
* stream.
*/
push: function (token) {
if (Array.isArray(token)) {
var tokens = /**@type {!Array.<number>}*/ token;
while (tokens.length) this.tokens.unshift(tokens.shift());
} else {
this.tokens.unshift(token);
}
}
};
//
// 5. Encodings
//
// 5.1 Encoders and decoders
/** @const */
var finished = -1;
/**
* @param {boolean} fatal If true, decoding errors raise an exception.
* @param {number=} opt_code_point Override the standard fallback code point.
* @return {number} The code point to insert on a decoding error.
*/
function decoderError(fatal, opt_code_point) {
if (fatal) throw TypeError('Decoder error');
return opt_code_point || 0xfffd;
}
/**
* @param {number} code_point The code point that could not be encoded.
* @return {number} Always throws, no value is actually returned.
*/
function encoderError(code_point) {
throw TypeError('The code point ' + code_point + ' could not be encoded.');
}
/** @interface */
function Decoder() {}
Decoder.prototype = {
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point, or |finished|.
*/
handler: function (stream, bite) {}
};
/** @interface */
function Encoder() {}
Encoder.prototype = {
/**
* @param {Stream} stream The stream of code points being encoded.
* @param {number} code_point Next code point read from the stream.
* @return {(number|!Array.<number>)} Byte(s) to emit, or |finished|.
*/
handler: function (stream, code_point) {}
};
// 5.2 Names and labels
// TODO: Define @typedef for Encoding: {name:string,labels:Array.<string>}
// https://github.com/google/closure-compiler/issues/247
/**
* @param {string} label The encoding label.
* @return {?{name:string,labels:Array.<string>}}
*/
function getEncoding(label) {
// 1. Remove any leading and trailing ASCII whitespace from label.
label = String(label).trim().toLowerCase();
// 2. If label is an ASCII case-insensitive match for any of the
// labels listed in the table below, return the corresponding
// encoding, and failure otherwise.
if (Object.prototype.hasOwnProperty.call(label_to_encoding, label)) {
return label_to_encoding[label];
}
return null;
}
/**
* Encodings table: https://encoding.spec.whatwg.org/encodings.json
* @const
* @type {!Array.<{
* heading: string,
* encodings: Array.<{name:string,labels:Array.<string>}>
* }>}
*/
var encodings = [
{
encodings: [
{
labels: ['unicode-1-1-utf-8', 'utf-8', 'utf8'],
name: 'UTF-8'
}
],
heading: 'The Encoding'
},
{
encodings: [
{
labels: ['866', 'cp866', 'csibm866', 'ibm866'],
name: 'IBM866'
},
{
labels: [
'csisolatin2',
'iso-8859-2',
'iso-ir-101',
'iso8859-2',
'iso88592',
'iso_8859-2',
'iso_8859-2:1987',
'l2',
'latin2'
],
name: 'ISO-8859-2'
},
{
labels: [
'csisolatin3',
'iso-8859-3',
'iso-ir-109',
'iso8859-3',
'iso88593',
'iso_8859-3',
'iso_8859-3:1988',
'l3',
'latin3'
],
name: 'ISO-8859-3'
},
{
labels: [
'csisolatin4',
'iso-8859-4',
'iso-ir-110',
'iso8859-4',
'iso88594',
'iso_8859-4',
'iso_8859-4:1988',
'l4',
'latin4'
],
name: 'ISO-8859-4'
},
{
labels: [
'csisolatincyrillic',
'cyrillic',
'iso-8859-5',
'iso-ir-144',
'iso8859-5',
'iso88595',
'iso_8859-5',
'iso_8859-5:1988'
],
name: 'ISO-8859-5'
},
{
labels: [
'arabic',
'asmo-708',
'csiso88596e',
'csiso88596i',
'csisolatinarabic',
'ecma-114',
'iso-8859-6',
'iso-8859-6-e',
'iso-8859-6-i',
'iso-ir-127',
'iso8859-6',
'iso88596',
'iso_8859-6',
'iso_8859-6:1987'
],
name: 'ISO-8859-6'
},
{
labels: [
'csisolatingreek',
'ecma-118',
'elot_928',
'greek',
'greek8',
'iso-8859-7',
'iso-ir-126',
'iso8859-7',
'iso88597',
'iso_8859-7',
'iso_8859-7:1987',
'sun_eu_greek'
],
name: 'ISO-8859-7'
},
{
labels: [
'csiso88598e',
'csisolatinhebrew',
'hebrew',
'iso-8859-8',
'iso-8859-8-e',
'iso-ir-138',
'iso8859-8',
'iso88598',
'iso_8859-8',
'iso_8859-8:1988',
'visual'
],
name: 'ISO-8859-8'
},
{
labels: ['csiso88598i', 'iso-8859-8-i', 'logical'],
name: 'ISO-8859-8-I'
},
{
labels: [
'csisolatin6',
'iso-8859-10',
'iso-ir-157',
'iso8859-10',
'iso885910',
'l6',
'latin6'
],
name: 'ISO-8859-10'
},
{
labels: ['iso-8859-13', 'iso8859-13', 'iso885913'],
name: 'ISO-8859-13'
},
{
labels: ['iso-8859-14', 'iso8859-14', 'iso885914'],
name: 'ISO-8859-14'
},
{
labels: ['csisolatin9', 'iso-8859-15', 'iso8859-15', 'iso885915', 'iso_8859-15', 'l9'],
name: 'ISO-8859-15'
},
{
labels: ['iso-8859-16'],
name: 'ISO-8859-16'
},
{
labels: ['cskoi8r', 'koi', 'koi8', 'koi8-r', 'koi8_r'],
name: 'KOI8-R'
},
{
labels: ['koi8-ru', 'koi8-u'],
name: 'KOI8-U'
},
{
labels: ['csmacintosh', 'mac', 'macintosh', 'x-mac-roman'],
name: 'macintosh'
},
{
labels: ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620', 'windows-874'],
name: 'windows-874'
},
{
labels: ['cp1250', 'windows-1250', 'x-cp1250'],
name: 'windows-1250'
},
{
labels: ['cp1251', 'windows-1251', 'x-cp1251'],
name: 'windows-1251'
},
{
labels: [
'ansi_x3.4-1968',
'ascii',
'cp1252',
'cp819',
'csisolatin1',
'ibm819',
'iso-8859-1',
'iso-ir-100',
'iso8859-1',
'iso88591',
'iso_8859-1',
'iso_8859-1:1987',
'l1',
'latin1',
'us-ascii',
'windows-1252',
'x-cp1252'
],
name: 'windows-1252'
},
{
labels: ['cp1253', 'windows-1253', 'x-cp1253'],
name: 'windows-1253'
},
{
labels: [
'cp1254',
'csisolatin5',
'iso-8859-9',
'iso-ir-148',
'iso8859-9',
'iso88599',
'iso_8859-9',
'iso_8859-9:1989',
'l5',
'latin5',
'windows-1254',
'x-cp1254'
],
name: 'windows-1254'
},
{
labels: ['cp1255', 'windows-1255', 'x-cp1255'],
name: 'windows-1255'
},
{
labels: ['cp1256', 'windows-1256', 'x-cp1256'],
name: 'windows-1256'
},
{
labels: ['cp1257', 'windows-1257', 'x-cp1257'],
name: 'windows-1257'
},
{
labels: ['cp1258', 'windows-1258', 'x-cp1258'],
name: 'windows-1258'
},
{
labels: ['x-mac-cyrillic', 'x-mac-ukrainian'],
name: 'x-mac-cyrillic'
}
],
heading: 'Legacy single-byte encodings'
},
{
encodings: [
{
labels: [
'chinese',
'csgb2312',
'csiso58gb231280',
'gb2312',
'gb_2312',
'gb_2312-80',
'gbk',
'iso-ir-58',
'x-gbk'
],
name: 'GBK'
},
{
labels: ['gb18030'],
name: 'gb18030'
}
],
heading: 'Legacy multi-byte Chinese (simplified) encodings'
},
{
encodings: [
{
labels: ['big5', 'big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
name: 'Big5'
}
],
heading: 'Legacy multi-byte Chinese (traditional) encodings'
},
{
encodings: [
{
labels: ['cseucpkdfmtjapanese', 'euc-jp', 'x-euc-jp'],
name: 'EUC-JP'
},
{
labels: ['csiso2022jp', 'iso-2022-jp'],
name: 'ISO-2022-JP'
},
{
labels: [
'csshiftjis',
'ms932',
'ms_kanji',
'shift-jis',
'shift_jis',
'sjis',
'windows-31j',
'x-sjis'
],
name: 'Shift_JIS'
}
],
heading: 'Legacy multi-byte Japanese encodings'
},
{
encodings: [
{
labels: [
'cseuckr',
'csksc56011987',
'euc-kr',
'iso-ir-149',
'korean',
'ks_c_5601-1987',
'ks_c_5601-1989',
'ksc5601',
'ksc_5601',
'windows-949'
],
name: 'EUC-KR'
}
],
heading: 'Legacy multi-byte Korean encodings'
},
{
encodings: [
{
labels: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr'],
name: 'replacement'
},
{
labels: ['utf-16be'],
name: 'UTF-16BE'
},
{
labels: ['utf-16', 'utf-16le'],
name: 'UTF-16LE'
},
{
labels: ['x-user-defined'],
name: 'x-user-defined'
}
],
heading: 'Legacy miscellaneous encodings'
}
];
// Label to encoding registry.
/** @type {Object.<string,{name:string,labels:Array.<string>}>} */
var label_to_encoding = {};
encodings.forEach(function (category) {
category.encodings.forEach(function (encoding) {
encoding.labels.forEach(function (label) {
label_to_encoding[label] = encoding;
});
});
});
// Registry of of encoder/decoder factories, by encoding name.
/** @type {Object.<string, function({fatal:boolean}): Encoder>} */
var encoders = {};
/** @type {Object.<string, function({fatal:boolean}): Decoder>} */
var decoders = {};
//
// 6. Indexes
//
/**
* @param {number} pointer The |pointer| to search for.
* @param {(!Array.<?number>|undefined)} index The |index| to search within.
* @return {?number} The code point corresponding to |pointer| in |index|,
* or null if |code point| is not in |index|.
*/
function indexCodePointFor(pointer, index) {
if (!index) return null;
return index[pointer] || null;
}
/**
* @param {number} code_point The |code point| to search for.
* @param {!Array.<?number>} index The |index| to search within.
* @return {?number} The first pointer corresponding to |code point| in
* |index|, or null if |code point| is not in |index|.
*/
function indexPointerFor(code_point, index) {
var pointer = index.indexOf(code_point);
return pointer === -1 ? null : pointer;
}
/**
* @param {string} name Name of the index.
* @return {(!Array.<number>|!Array.<Array.<number>>)}
* */
function index(name) {
if (!('encoding-indexes' in globalThis)) {
throw Error('Indexes missing.' + ' Did you forget to include encoding-indexes.js first?');
}
return globalThis['encoding-indexes'][name];
}
/**
* @param {number} pointer The |pointer| to search for in the gb18030 index.
* @return {?number} The code point corresponding to |pointer| in |index|,
* or null if |code point| is not in the gb18030 index.
*/
function indexGB18030RangesCodePointFor(pointer) {
// 1. If pointer is greater than 39419 and less than 189000, or
// pointer is greater than 1237575, return null.
if ((pointer > 39419 && pointer < 189000) || pointer > 1237575) return null;
// 2. If pointer is 7457, return code point U+E7C7.
if (pointer === 7457) return 0xe7c7;
// 3. Let offset be the last pointer in index gb18030 ranges that
// is equal to or less than pointer and let code point offset be
// its corresponding code point.
var offset = 0;
var code_point_offset = 0;
var idx = index('gb18030-ranges');
var i;
for (i = 0; i < idx.length; ++i) {
/** @type {!Array.<number>} */
var entry = idx[i];
if (entry[0] <= pointer) {
offset = entry[0];
code_point_offset = entry[1];
} else {
break;
}
}
// 4. Return a code point whose value is code point offset +
// pointer − offset.
return code_point_offset + pointer - offset;
}
/**
* @param {number} code_point The |code point| to locate in the gb18030 index.
* @return {number} The first pointer corresponding to |code point| in the
* gb18030 index.
*/
function indexGB18030RangesPointerFor(code_point) {
// 1. If code point is U+E7C7, return pointer 7457.
if (code_point === 0xe7c7) return 7457;
// 2. Let offset be the last code point in index gb18030 ranges
// that is equal to or less than code point and let pointer offset
// be its corresponding pointer.
var offset = 0;
var pointer_offset = 0;
var idx = index('gb18030-ranges');
var i;
for (i = 0; i < idx.length; ++i) {
/** @type {!Array.<number>} */
var entry = idx[i];
if (entry[1] <= code_point) {
offset = entry[1];
pointer_offset = entry[0];
} else {
break;
}
}
// 3. Return a pointer whose value is pointer offset + code point
// − offset.
return pointer_offset + code_point - offset;
}
/**
* @param {number} code_point The |code_point| to search for in the Shift_JIS
* index.
* @return {?number} The code point corresponding to |pointer| in |index|,
* or null if |code point| is not in the Shift_JIS index.
*/
function indexShiftJISPointerFor(code_point) {
// 1. Let index be index jis0208 excluding all entries whose
// pointer is in the range 8272 to 8835, inclusive.
shift_jis_index =
shift_jis_index ||
index('jis0208').map(function (code_point, pointer) {
return inRange(pointer, 8272, 8835) ? null : code_point;
});
var index_ = shift_jis_index;
// 2. Return the index pointer for code point in index.
return index_.indexOf(code_point);
}
var shift_jis_index;
/**
* @param {number} code_point The |code_point| to search for in the big5
* index.
* @return {?number} The code point corresponding to |pointer| in |index|,
* or null if |code point| is not in the big5 index.
*/
function indexBig5PointerFor(code_point) {
// 1. Let index be index Big5 excluding all entries whose pointer
big5_index_no_hkscs =
big5_index_no_hkscs ||
index('big5').map(function (code_point, pointer) {
return pointer < (0xa1 - 0x81) * 157 ? null : code_point;
});
var index_ = big5_index_no_hkscs;
// 2. If code point is U+2550, U+255E, U+2561, U+256A, U+5341, or
// U+5345, return the last pointer corresponding to code point in
// index.
if (
code_point === 0x2550 ||
code_point === 0x255e ||
code_point === 0x2561 ||
code_point === 0x256a ||
code_point === 0x5341 ||
code_point === 0x5345
) {
return index_.lastIndexOf(code_point);
}
// 3. Return the index pointer for code point in index.
return indexPointerFor(code_point, index_);
}
var big5_index_no_hkscs;
//
// 8. API
//
/** @const */ var DEFAULT_ENCODING = 'utf-8';
// 8.1 Interface TextDecoder
/**
* @constructor
* @param {string=} label The label of the encoding;
* defaults to 'utf-8'.
* @param {Object=} options
*/
function TextDecoder(label, options) {
// Web IDL conventions
if (!(this instanceof TextDecoder))
throw TypeError("Called as a function. Did you forget 'new'?");
label = label !== undefined ? String(label) : DEFAULT_ENCODING;
options = ToDictionary(options);
// A TextDecoder object has an associated encoding, decoder,
// stream, ignore BOM flag (initially unset), BOM seen flag
// (initially unset), error mode (initially replacement), and do
// not flush flag (initially unset).
/** @private */
this._encoding = null;
/** @private @type {?Decoder} */
this._decoder = null;
/** @private @type {boolean} */
this._ignoreBOM = false;
/** @private @type {boolean} */
this._BOMseen = false;
/** @private @type {string} */
this._error_mode = 'replacement';
/** @private @type {boolean} */
this._do_not_flush = false;
// 1. Let encoding be the result of getting an encoding from
// label.
var encoding = getEncoding(label);
// 2. If encoding is failure or replacement, throw a RangeError.
if (encoding === null || encoding.name === 'replacement')
throw RangeError('Unknown encoding: ' + label);
if (!decoders[encoding.name]) {
throw Error('Decoder not present.' + ' Did you forget to include encoding-indexes.js first?');
}
// 3. Let dec be a new TextDecoder object.
var dec = this;
// 4. Set dec's encoding to encoding.
dec._encoding = encoding;
// 5. If options's fatal member is true, set dec's error mode to
// fatal.
if (Boolean(options['fatal'])) dec._error_mode = 'fatal';
// 6. If options's ignoreBOM member is true, set dec's ignore BOM
// flag.
if (Boolean(options['ignoreBOM'])) dec._ignoreBOM = true;
// For pre-ES5 runtimes:
if (!Object.defineProperty) {
this.encoding = dec._encoding.name.toLowerCase();
this.fatal = dec._error_mode === 'fatal';
this.ignoreBOM = dec._ignoreBOM;
}
// 7. Return dec.
return dec;
}
if (Object.defineProperty) {
// The encoding attribute's getter must return encoding's name.
Object.defineProperty(TextDecoder.prototype, 'encoding', {
/** @this {TextDecoder} */
get: function () {
return this._encoding.name.toLowerCase();
}
});
// The fatal attribute's getter must return true if error mode
// is fatal, and false otherwise.
Object.defineProperty(TextDecoder.prototype, 'fatal', {
/** @this {TextDecoder} */
get: function () {
return this._error_mode === 'fatal';
}
});
// The ignoreBOM attribute's getter must return true if ignore
// BOM flag is set, and false otherwise.
Object.defineProperty(TextDecoder.prototype, 'ignoreBOM', {
/** @this {TextDecoder} */
get: function () {
return this._ignoreBOM;
}
});
}
/**
* @param {BufferSource=} input The buffer of bytes to decode.
* @param {Object=} options
* @return {string} The decoded string.
*/
TextDecoder.prototype.decode = function decode(input, options) {
var bytes;
if (typeof input === 'object' && input instanceof ArrayBuffer) {
bytes = new Uint8Array(input);
} else if (
typeof input === 'object' &&
'buffer' in input &&
input.buffer instanceof ArrayBuffer
) {
bytes = new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
} else {
bytes = new Uint8Array(0);
}
options = ToDictionary(options);
// 1. If the do not flush flag is unset, set decoder to a new
// encoding's decoder, set stream to a new stream, and unset the
// BOM seen flag.
if (!this._do_not_flush) {
this._decoder = decoders[this._encoding.name]({
fatal: this._error_mode === 'fatal'
});
this._BOMseen = false;
}
// 2. If options's stream is true, set the do not flush flag, and
// unset the do not flush flag otherwise.
this._do_not_flush = Boolean(options['stream']);
// 3. If input is given, push a copy of input to stream.
// TODO: Align with spec algorithm - maintain stream on instance.
var input_stream = new Stream(bytes);
// 4. Let output be a new stream.
var output = [];
/** @type {?(number|!Array.<number>)} */
var result;
// 5. While true:
while (true) {
// 1. Let token be the result of reading from stream.
var token = input_stream.read();
// 2. If token is end-of-stream and the do not flush flag is
// set, return output, serialized.
// TODO: Align with spec algorithm.
if (token === end_of_stream) break;
// 3. Otherwise, run these subsubsteps:
// 1. Let result be the result of processing token for decoder,
// stream, output, and error mode.
result = this._decoder.handler(input_stream, token);
// 2. If result is finished, return output, serialized.
if (result === finished) break;
if (result !== null) {
if (Array.isArray(result)) output.push.apply(output, /**@type {!Array.<number>}*/ result);
else output.push(result);
}
// 3. Otherwise, if result is error, throw a TypeError.
// (Thrown in handler)
// 4. Otherwise, do nothing.
}
// TODO: Align with spec algorithm.
if (!this._do_not_flush) {
do {
result = this._decoder.handler(input_stream, input_stream.read());
if (result === finished) break;
if (result === null) continue;
if (Array.isArray(result)) output.push.apply(output, /**@type {!Array.<number>}*/ result);
else output.push(result);
} while (!input_stream.endOfStream());
this._decoder = null;
}
// A TextDecoder object also has an associated serialize stream
// algorithm...
/**
* @param {!Array.<number>} stream
* @return {string}
* @this {TextDecoder}
*/
function serializeStream(stream) {
// 1. Let token be the result of reading from stream.
// (Done in-place on array, rather than as a stream)
// 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore
// BOM flag and BOM seen flag are unset, run these subsubsteps:
if (
includes(['UTF-8', 'UTF-16LE', 'UTF-16BE'], this._encoding.name) &&
!this._ignoreBOM &&
!this._BOMseen
) {
if (stream.length > 0 && stream[0] === 0xfeff) {
// 1. If token is U+FEFF, set BOM seen flag.
this._BOMseen = true;
stream.shift();
} else if (stream.length > 0) {
// 2. Otherwise, if token is not end-of-stream, set BOM seen
// flag and append token to stream.
this._BOMseen = true;
} else {
// 3. Otherwise, if token is not end-of-stream, append token
// to output.
// (no-op)
}
}
// 4. Otherwise, return output.
return codePointsToString(stream);
}
return serializeStream.call(this, output);
};
// 8.2 Interface TextEncoder
/**
* @constructor
* @param {string=} label The label of the encoding. NONSTANDARD.
* @param {Object=} options NONSTANDARD.
*/
function TextEncoder(label, options) {
// Web IDL conventions
if (!(this instanceof TextEncoder))
throw TypeError("Called as a function. Did you forget 'new'?");
options = ToDictionary(options);
// A TextEncoder object has an associated encoding and encoder.
/** @private */
this._encoding = null;
/** @private @type {?Encoder} */
this._encoder = null;
// Non-standard
/** @private @type {boolean} */
this._do_not_flush = false;
/** @private @type {string} */
this._fatal = Boolean(options['fatal']) ? 'fatal' : 'replacement';
// 1. Let enc be a new TextEncoder object.
var enc = this;
// 2. Set enc's encoding to UTF-8's encoder.
if (Boolean(options['NONSTANDARD_allowLegacyEncoding'])) {
// NONSTANDARD behavior.
label = label !== undefined ? String(label) : DEFAULT_ENCODING;
var encoding = getEncoding(label);
if (encoding === null || encoding.name === 'replacement')
throw RangeError('Unknown encoding: ' + label);
if (!encoders[encoding.name]) {
throw Error('Encoder not present.' + ' Did you forget to include encoding-indexes.js first?');
}
enc._encoding = encoding;
} else {
// Standard behavior.
enc._encoding = getEncoding('utf-8');
if (label !== undefined && 'console' in globalThis) {
console.warn('TextEncoder constructor called with encoding label, ' + 'which is ignored.');
}
}
// For pre-ES5 runtimes:
if (!Object.defineProperty) this.encoding = enc._encoding.name.toLowerCase();
// 3. Return enc.
return enc;
}
if (Object.defineProperty) {
// The encoding attribute's getter must return encoding's name.
Object.defineProperty(TextEncoder.prototype, 'encoding', {
/** @this {TextEncoder} */
get: function () {
return this._encoding.name.toLowerCase();
}
});
}
/**
* @param {string=} opt_string The string to encode.
* @param {Object=} options
* @return {!Uint8Array} Encoded bytes, as a Uint8Array.
*/
TextEncoder.prototype.encode = function encode(opt_string, options) {
opt_string = opt_string === undefined ? '' : String(opt_string);
options = ToDictionary(options);
// NOTE: This option is nonstandard. None of the encodings
// permitted for encoding (i.e. UTF-8, UTF-16) are stateful when
// the input is a USVString so streaming is not necessary.
if (!this._do_not_flush)
this._encoder = encoders[this._encoding.name]({
fatal: this._fatal === 'fatal'
});
this._do_not_flush = Boolean(options['stream']);
// 1. Convert input to a stream.
var input = new Stream(stringToCodePoints(opt_string));
// 2. Let output be a new stream
var output = [];
/** @type {?(number|!Array.<number>)} */
var result;
// 3. While true, run these substeps:
while (true) {
// 1. Let token be the result of reading from input.
var token = input.read();
if (token === end_of_stream) break;
// 2. Let result be the result of processing token for encoder,
// input, output.
result = this._encoder.handler(input, token);
if (result === finished) break;
if (Array.isArray(result)) output.push.apply(output, /**@type {!Array.<number>}*/ result);
else output.push(result);
}
// TODO: Align with spec algorithm.
if (!this._do_not_flush) {
while (true) {
result = this._encoder.handler(input, input.read());
if (result === finished) break;
if (Array.isArray(result)) output.push.apply(output, /**@type {!Array.<number>}*/ result);
else output.push(result);
}
this._encoder = null;
}
// 3. If result is finished, convert output into a byte sequence,
// and then return a Uint8Array object wrapping an ArrayBuffer
// containing output.
return new Uint8Array(output);
};
//
// 9. The encoding
//
// 9.1 utf-8
// 9.1.1 utf-8 decoder
/**
* @constructor
* @implements {Decoder}
* @param {{fatal: boolean}} options
*/
function UTF8Decoder(options) {
var fatal = options.fatal;
// utf-8's decoder's has an associated utf-8 code point, utf-8
// bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
// lower boundary (initially 0x80), and a utf-8 upper boundary
// (initially 0xBF).
var /** @type {number} */ utf8_code_point = 0,
/** @type {number} */ utf8_bytes_seen = 0,
/** @type {number} */ utf8_bytes_needed = 0,
/** @type {number} */ utf8_lower_boundary = 0x80,
/** @type {number} */ utf8_upper_boundary = 0xbf;
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point.
*/
this.handler = function (stream, bite) {
// 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
// set utf-8 bytes needed to 0 and return error.
if (bite === end_of_stream && utf8_bytes_needed !== 0) {
utf8_bytes_needed = 0;
return decoderError(fatal);
}
// 2. If byte is end-of-stream, return finished.
if (bite === end_of_stream) return finished;
// 3. If utf-8 bytes needed is 0, based on byte:
if (utf8_bytes_needed === 0) {
// 0x00 to 0x7F
if (inRange(bite, 0x00, 0x7f)) {
// Return a code point whose value is byte.
return bite;
}
// 0xC2 to 0xDF
else if (inRange(bite, 0xc2, 0xdf)) {
// 1. Set utf-8 bytes needed to 1.
utf8_bytes_needed = 1;
// 2. Set UTF-8 code point to byte & 0x1F.
utf8_code_point = bite & 0x1f;
}
// 0xE0 to 0xEF
else if (inRange(bite, 0xe0, 0xef)) {
// 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
if (bite === 0xe0) utf8_lower_boundary = 0xa0;
// 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
if (bite === 0xed) utf8_upper_boundary = 0x9f;
// 3. Set utf-8 bytes needed to 2.
utf8_bytes_needed = 2;
// 4. Set UTF-8 code point to byte & 0xF.
utf8_code_point = bite & 0xf;
}
// 0xF0 to 0xF4
else if (inRange(bite, 0xf0, 0xf4)) {
// 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
if (bite === 0xf0) utf8_lower_boundary = 0x90;
// 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
if (bite === 0xf4) utf8_upper_boundary = 0x8f;
// 3. Set utf-8 bytes needed to 3.
utf8_bytes_needed = 3;
// 4. Set UTF-8 code point to byte & 0x7.
utf8_code_point = bite & 0x7;
}
// Otherwise
else {
// Return error.
return decoderError(fatal);
}
// Return continue.
return null;
}
// 4. If byte is not in the range utf-8 lower boundary to utf-8
// upper boundary, inclusive, run these substeps:
if (!inRange(bite, utf8_lower_boundary, utf8_upper_boundary)) {
// 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
// bytes seen to 0, set utf-8 lower boundary to 0x80, and set
// utf-8 upper boundary to 0xBF.
utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
utf8_lower_boundary = 0x80;
utf8_upper_boundary = 0xbf;
// 2. Prepend byte to stream.
stream.prepend(bite);
// 3. Return error.
return decoderError(fatal);
}
// 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
// to 0xBF.
utf8_lower_boundary = 0x80;
utf8_upper_boundary = 0xbf;
// 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
// 0x3F)
utf8_code_point = (utf8_code_point << 6) | (bite & 0x3f);
// 7. Increase utf-8 bytes seen by one.
utf8_bytes_seen += 1;
// 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
// continue.
if (utf8_bytes_seen !== utf8_bytes_needed) return null;
// 9. Let code point be utf-8 code point.
var code_point = utf8_code_point;
// 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
// seen to 0.
utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
// 11. Return a code point whose value is code point.
return code_point;
};
}
// 9.1.2 utf-8 encoder
/**
* @constructor
* @implements {Encoder}
* @param {{fatal: boolean}} options
*/
function UTF8Encoder(options) {
var fatal = options.fatal;
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
* @return {(number|!Array.<number>)} Byte(s) to emit.
*/
this.handler = function (stream, code_point) {
// 1. If code point is end-of-stream, return finished.
if (code_point === end_of_stream) return finished;
// 2. If code point is an ASCII code point, return a byte whose
// value is code point.
if (isASCIICodePoint(code_point)) return code_point;
// 3. Set count and offset based on the range code point is in:
var count, offset;
// U+0080 to U+07FF, inclusive:
if (inRange(code_point, 0x0080, 0x07ff)) {
// 1 and 0xC0
count = 1;
offset = 0xc0;
}
// U+0800 to U+FFFF, inclusive:
else if (inRange(code_point, 0x0800, 0xffff)) {
// 2 and 0xE0
count = 2;
offset = 0xe0;
}
// U+10000 to U+10FFFF, inclusive:
else if (inRange(code_point, 0x10000, 0x10ffff)) {
// 3 and 0xF0
count = 3;
offset = 0xf0;
}
// 4. Let bytes be a byte sequence whose first byte is (code
// point >> (6 × count)) + offset.
var bytes = [(code_point >> (6 * count)) + offset];
// 5. Run these substeps while count is greater than 0:
while (count > 0) {
// 1. Set temp to code point >> (6 × (count − 1)).
var temp = code_point >> (6 * (count - 1));
// 2. Append to bytes 0x80 | (temp & 0x3F).
bytes.push(0x80 | (temp & 0x3f));
// 3. Decrease count by one.
count -= 1;
}
// 6. Return bytes bytes, in order.
return bytes;
};
}
/** @param {{fatal: boolean}} options */
encoders['UTF-8'] = function (options) {
return new UTF8Encoder(options);
};
/** @param {{fatal: boolean}} options */
decoders['UTF-8'] = function (options) {
return new UTF8Decoder(options);
};
//
// 10. Legacy single-byte encodings
//
// 10.1 single-byte decoder
/**
* @constructor
* @implements {Decoder}
* @param {!Array.<number>} index The encoding index.
* @param {{fatal: boolean}} options
*/
function SingleByteDecoder(index, options) {
var fatal = options.fatal;
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point.
*/
this.handler = function (stream, bite) {
// 1. If byte is end-of-stream, return finished.
if (bite === end_of_stream) return finished;
// 2. If byte is an ASCII byte, return a code point whose value
// is byte.
if (isASCIIByte(bite)) return bite;
// 3. Let code point be the index code point for byte − 0x80 in
// index single-byte.
var code_point = index[bite - 0x80];
// 4. If code point is null, return error.
if (code_point === null) return decoderError(fatal);
// 5. Return a code point whose value is code point.
return code_point;
};
}
// 10.2 single-byte encoder
/**
* @constructor
* @implements {Encoder}
* @param {!Array.<?number>} index The encoding index.
* @param {{fatal: boolean}} options
*/
function SingleByteEncoder(index, options) {
var fatal = options.fatal;
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
* @return {(number|!Array.<number>)} Byte(s) to emit.
*/
this.handler = function (stream, code_point) {
// 1. If code point is end-of-stream, return finished.
if (code_point === end_of_stream) return finished;
// 2. If code point is an ASCII code point, return a byte whose
// value is code point.
if (isASCIICodePoint(code_point)) return code_point;
// 3. Let pointer be the index pointer for code point in index
// single-byte.
var pointer = indexPointerFor(code_point, index);
// 4. If pointer is null, return error with code point.
if (pointer === null) encoderError(code_point);
// 5. Return a byte whose value is pointer + 0x80.
return pointer + 0x80;
};
}
(function () {
if (!('encoding-indexes' in globalThis)) return;
encodings.forEach(function (category) {
if (category.heading !== 'Legacy single-byte encodings') return;
category.encodings.forEach(function (encoding) {
var name = encoding.name;
var idx = index(name.toLowerCase());
/** @param {{fatal: boolean}} options */
decoders[name] = function (options) {
return new SingleByteDecoder(idx, options);
};
/** @param {{fatal: boolean}} options */
encoders[name] = function (options) {
return new SingleByteEncoder(idx, options);
};
});
});
})();
//
// 11. Legacy multi-byte Chinese (simplified) encodings
//
// 11.1 gbk
// 11.1.1 gbk decoder
// gbk's decoder is gb18030's decoder.
/** @param {{fatal: boolean}} options */
decoders['GBK'] = function (options) {
return new GB18030Decoder(options);
};
// 11.1.2 gbk encoder
// gbk's encoder is gb18030's encoder with its gbk flag set.
/** @param {{fatal: boolean}} options */
encoders['GBK'] = function (options) {
return new GB18030Encoder(options, true);
};
// 11.2 gb18030
// 11.2.1 gb18030 decoder
/**
* @constructor
* @implements {Decoder}
* @param {{fatal: boolean}} options
*/
function GB18030Decoder(options) {
var fatal = options.fatal;
// gb18030's decoder has an associated gb18030 first, gb18030
// second, and gb18030 third (all initially 0x00).
var /** @type {number} */ gb18030_first = 0x00,
/** @type {number} */ gb18030_second = 0x00,
/** @type {number} */ gb18030_third = 0x00;
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point.
*/
this.handler = function (stream, bite) {
// 1. If byte is end-of-stream and gb18030 first, gb18030
// second, and gb18030 third are 0x00, return finished.
if (
bite === end_of_stream &&
gb18030_first === 0x00 &&
gb18030_second === 0x00 &&
gb18030_third === 0x00
) {
return finished;
}
// 2. If byte is end-of-stream, and gb18030 first, gb18030
// second, or gb18030 third is not 0x00, set gb18030 first,
// gb18030 second, and gb18030 third to 0x00, and return error.
if (
bite === end_of_stream &&
(gb18030_first !== 0x00 || gb18030_second !== 0x00 || gb18030_third !== 0x00)
) {
gb18030_first = 0x00;
gb18030_second = 0x00;
gb18030_third = 0x00;
decoderError(fatal);
}
var code_point;
// 3. If gb18030 third is not 0x00, run these substeps:
if (gb18030_third !== 0x00) {
// 1. Let code point be null.
code_point = null;
// 2. If byte is in the range 0x30 to 0x39, inclusive, set
// code point to the index gb18030 ranges code point for
// (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) ×
// 126 + gb18030 third − 0x81) × 10 + byte − 0x30.
if (inRange(bite, 0x30, 0x39)) {
code_point = indexGB18030RangesCodePointFor(
(((gb18030_first - 0x81) * 10 + gb18030_second - 0x30) * 126 + gb18030_third - 0x81) *
10 +
bite -
0x30
);
}
// 3. Let buffer be a byte sequence consisting of gb18030
// second, gb18030 third, and byte, in order.
var buffer = [gb18030_second, gb18030_third, bite];
// 4. Set gb18030 first, gb18030 second, and gb18030 third to
// 0x00.
gb18030_first = 0x00;
gb18030_second = 0x00;
gb18030_third = 0x00;
// 5. If code point is null, prepend buffer to stream and
// return error.
if (code_point === null) {
stream.prepend(buffer);
return decoderError(fatal);
}
// 6. Return a code point whose value is code point.
return code_point;
}
// 4. If gb18030 second is not 0x00, run these substeps:
if (gb18030_second !== 0x00) {
// 1. If byte is in the range 0x81 to 0xFE, inclusive, set
// gb18030 third to byte and return continue.
if (inRange(bite, 0x81, 0xfe)) {
gb18030_third = bite;
return null;
}
// 2. Prepend gb18030 second followed by byte to stream, set
// gb18030 first and gb18030 second to 0x00, and return error.
stream.prepend([gb18030_second, bite]);
gb18030_first = 0x00;
gb18030_second = 0x00;
return decoderError(fatal);
}
// 5. If gb18030 first is not 0x00, run these substeps:
if (gb18030_first !== 0x00) {
// 1. If byte is in the range 0x30 to 0x39, inclusive, set
// gb18030 second to byte and return continue.
if (inRange(bite, 0x30, 0x39)) {
gb18030_second = bite;
return null;
}
// 2. Let lead be gb18030 first, let pointer be null, and set
// gb18030 first to 0x00.
var lead = gb18030_first;
var pointer = null;
gb18030_first = 0x00;
// 3. Let offset be 0x40 if byte is less than 0x7F and 0x41
// otherwise.
var offset = bite < 0x7f ? 0x40 : 0x41;
// 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
// to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 +
// (byte − offset).
if (inRange(bite, 0x40, 0x7e) || inRange(bite, 0x80, 0xfe))
pointer = (lead - 0x81) * 190 + (bite - offset);
// 5. Let code point be null if pointer is null and the index
// code point for pointer in index gb18030 otherwise.
code_point = pointer === null ? null : indexCodePointFor(pointer, index('gb18030'));
// 6. If code point is null and byte is an ASCII byte, prepend
// byte to stream.
if (code_point === null && isASCIIByte(bite)) stream.prepend(bite);
// 7. If code point is null, return error.
if (code_point === null) return decoderError(fatal);
// 8. Return a code point whose value is code point.
return code_point;
}
// 6. If byte is an ASCII byte, return a code point whose value
// is byte.
if (isASCIIByte(bite)) return bite;
// 7. If byte is 0x80, return code point U+20AC.
if (bite === 0x80) return 0x20ac;
// 8. If byte is in the range 0x81 to 0xFE, inclusive, set
// gb18030 first to byte and return continue.
if (inRange(bite, 0x81, 0xfe)) {
gb18030_first = bite;
return null;
}
// 9. Return error.
return decoderError(fatal);
};
}
// 11.2.2 gb18030 encoder
/**
* @constructor
* @implements {Encoder}
* @param {{fatal: boolean}} options
* @param {boolean=} gbk_flag
*/
function GB18030Encoder(options, gbk_flag) {
var fatal = options.fatal;
// gb18030's decoder has an associated gbk flag (initially unset).
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
* @return {(number|!Array.<number>)} Byte(s) to emit.
*/
this.handler = function (stream, code_point) {
// 1. If code point is end-of-stream, return finished.
if (code_point === end_of_stream) return finished;
// 2. If code point is an ASCII code point, return a byte whose
// value is code point.
if (isASCIICodePoint(code_point)) return code_point;
// 3. If code point is U+E5E5, return error with code point.
if (code_point === 0xe5e5) return encoderError(code_point);
// 4. If the gbk flag is set and code point is U+20AC, return
// byte 0x80.
if (gbk_flag && code_point === 0x20ac) return 0x80;
// 5. Let pointer be the index pointer for code point in index
// gb18030.
var pointer = indexPointerFor(code_point, index('gb18030'));
// 6. If pointer is not null, run these substeps:
if (pointer !== null) {
// 1. Let lead be floor(pointer / 190) + 0x81.
var lead = floor(pointer / 190) + 0x81;
// 2. Let trail be pointer % 190.
var trail = pointer % 190;
// 3. Let offset be 0x40 if trail is less than 0x3F and 0x41 otherwise.
var offset = trail < 0x3f ? 0x40 : 0x41;
// 4. Return two bytes whose values are lead and trail + offset.
return [lead, trail + offset];
}
// 7. If gbk flag is set, return error with code point.
if (gbk_flag) return encoderError(code_point);
// 8. Set pointer to the index gb18030 ranges pointer for code
// point.
pointer = indexGB18030RangesPointerFor(code_point);
// 9. Let byte1 be floor(pointer / 10 / 126 / 10).
var byte1 = floor(pointer / 10 / 126 / 10);
// 10. Set pointer to pointer − byte1 × 10 × 126 × 10.
pointer = pointer - byte1 * 10 * 126 * 10;
// 11. Let byte2 be floor(pointer / 10 / 126).
var byte2 = floor(pointer / 10 / 126);
// 12. Set pointer