UNPKG

@zxing/text-encoding

Version:

Polyfill for the Encoding Living Standard's API.

github.com/inexorabletash/text-encoding

zxing-js/text-encoding

1,256 lines (1,240 loc) • 125 kB

JavaScript

(function (global, factory) { typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : typeof define === 'function' && define.amd ? define(['exports'], factory) : (global = global || self, factory(global.TextEncoding = {})); }(this, (function (exports) { 'use strict'; /** @const */ var DEFAULT_ENCODING = 'utf-8'; /** * @param {boolean} fatal If true, decoding errors raise an exception. * @param {number=} opt_code_point Override the standard fallback code point. * @return {number} The code point to insert on a decoding error. */ function decoderError(fatal, opt_code_point) { if (opt_code_point === void 0) { opt_code_point = undefined; } if (fatal) throw TypeError("Decoder error"); return opt_code_point || 0xfffd; } /** * @param {number} code_point The code point that could not be encoded. * @return {number} Always throws, no value is actually returned. */ function encoderError(code_point) { throw TypeError("The code point " + code_point + " could not be encoded."); } // 5.2 Names and labels // TODO: Define @typedef for Encoding: {name:string,labels:Array.<string>} // https://github.com/google/closure-compiler/issues/247 /** * @param {string} label The encoding label. * @return {?{name:string,labels:Array.<string>}} */ function getEncoding(label) { // 1. Remove any leading and trailing ASCII whitespace from label. var keyLabel = String(label).trim().toLowerCase(); // 2. If label is an ASCII case-insensitive match for any of the // labels listed in the table below, return the corresponding // encoding, and failure otherwise. if (keyLabel in label_to_encoding) { return label_to_encoding[keyLabel]; } return null; } /** * Encodings table: https://encoding.spec.whatwg.org/encodings.json * @const * @type {!Array.<{ * heading: string, * encodings: Array.<{name:string,labels:Array.<string>}> * }>} */ var encodings = [ { encodings: [ { labels: ["unicode-1-1-utf-8", "utf-8", "utf8"], name: "UTF-8", }, ], heading: "The Encoding", }, { encodings: [ { labels: ["866", "cp866", "csibm866", "ibm866"], name: "IBM866", }, { labels: [ "csisolatin2", "iso-8859-2", "iso-ir-101", "iso8859-2", "iso88592", "iso_8859-2", "iso_8859-2:1987", "l2", "latin2", ], name: "ISO-8859-2", }, { labels: [ "csisolatin3", "iso-8859-3", "iso-ir-109", "iso8859-3", "iso88593", "iso_8859-3", "iso_8859-3:1988", "l3", "latin3", ], name: "ISO-8859-3", }, { labels: [ "csisolatin4", "iso-8859-4", "iso-ir-110", "iso8859-4", "iso88594", "iso_8859-4", "iso_8859-4:1988", "l4", "latin4", ], name: "ISO-8859-4", }, { labels: [ "csisolatincyrillic", "cyrillic", "iso-8859-5", "iso-ir-144", "iso8859-5", "iso88595", "iso_8859-5", "iso_8859-5:1988", ], name: "ISO-8859-5", }, { labels: [ "arabic", "asmo-708", "csiso88596e", "csiso88596i", "csisolatinarabic", "ecma-114", "iso-8859-6", "iso-8859-6-e", "iso-8859-6-i", "iso-ir-127", "iso8859-6", "iso88596", "iso_8859-6", "iso_8859-6:1987", ], name: "ISO-8859-6", }, { labels: [ "csisolatingreek", "ecma-118", "elot_928", "greek", "greek8", "iso-8859-7", "iso-ir-126", "iso8859-7", "iso88597", "iso_8859-7", "iso_8859-7:1987", "sun_eu_greek", ], name: "ISO-8859-7", }, { labels: [ "csiso88598e", "csisolatinhebrew", "hebrew", "iso-8859-8", "iso-8859-8-e", "iso-ir-138", "iso8859-8", "iso88598", "iso_8859-8", "iso_8859-8:1988", "visual", ], name: "ISO-8859-8", }, { labels: ["csiso88598i", "iso-8859-8-i", "logical"], name: "ISO-8859-8-I", }, { labels: [ "csisolatin6", "iso-8859-10", "iso-ir-157", "iso8859-10", "iso885910", "l6", "latin6", ], name: "ISO-8859-10", }, { labels: ["iso-8859-13", "iso8859-13", "iso885913"], name: "ISO-8859-13", }, { labels: ["iso-8859-14", "iso8859-14", "iso885914"], name: "ISO-8859-14", }, { labels: [ "csisolatin9", "iso-8859-15", "iso8859-15", "iso885915", "iso_8859-15", "l9", ], name: "ISO-8859-15", }, { labels: ["iso-8859-16"], name: "ISO-8859-16", }, { labels: ["cskoi8r", "koi", "koi8", "koi8-r", "koi8_r"], name: "KOI8-R", }, { labels: ["koi8-ru", "koi8-u"], name: "KOI8-U", }, { labels: ["csmacintosh", "mac", "macintosh", "x-mac-roman"], name: "macintosh", }, { labels: [ "dos-874", "iso-8859-11", "iso8859-11", "iso885911", "tis-620", "windows-874", ], name: "windows-874", }, { labels: ["cp1250", "windows-1250", "x-cp1250"], name: "windows-1250", }, { labels: ["cp1251", "windows-1251", "x-cp1251"], name: "windows-1251", }, { labels: [ "ansi_x3.4-1968", "cp1252", "cp819", "ibm819", "iso-ir-100", "windows-1252", "x-cp1252", ], name: "windows-1252", }, { labels: [ "ascii", "us-ascii", "iso-8859-1", "iso8859-1", "iso88591", "iso_8859-1", "iso_8859-1:1987", "l1", "latin1", "csisolatin1", ], name: "iso-8859-1", }, { labels: ["cp1253", "windows-1253", "x-cp1253"], name: "windows-1253", }, { labels: [ "cp1254", "csisolatin5", "iso-8859-9", "iso-ir-148", "iso8859-9", "iso88599", "iso_8859-9", "iso_8859-9:1989", "l5", "latin5", "windows-1254", "x-cp1254", ], name: "windows-1254", }, { labels: ["cp1255", "windows-1255", "x-cp1255"], name: "windows-1255", }, { labels: ["cp1256", "windows-1256", "x-cp1256"], name: "windows-1256", }, { labels: ["cp1257", "windows-1257", "x-cp1257"], name: "windows-1257", }, { labels: ["cp1258", "windows-1258", "x-cp1258"], name: "windows-1258", }, { labels: ["x-mac-cyrillic", "x-mac-ukrainian"], name: "x-mac-cyrillic", }, ], heading: "Legacy single-byte encodings", }, { encodings: [ { labels: [ "chinese", "csgb2312", "csiso58gb231280", "gb2312", "gb_2312", "gb_2312-80", "gbk", "iso-ir-58", "x-gbk", ], name: "GBK", }, { labels: ["gb18030"], name: "gb18030", }, ], heading: "Legacy multi-byte Chinese (simplified) encodings", }, { encodings: [ { labels: ["big5", "big5-hkscs", "cn-big5", "csbig5", "x-x-big5"], name: "Big5", }, ], heading: "Legacy multi-byte Chinese (traditional) encodings", }, { encodings: [ { labels: ["cseucpkdfmtjapanese", "euc-jp", "x-euc-jp"], name: "EUC-JP", }, { labels: ["csiso2022jp", "iso-2022-jp"], name: "ISO-2022-JP", }, { labels: [ "csshiftjis", "ms932", "ms_kanji", "shift-jis", "shift_jis", "sjis", "windows-31j", "x-sjis", ], name: "Shift_JIS", }, ], heading: "Legacy multi-byte Japanese encodings", }, { encodings: [ { labels: [ "cseuckr", "csksc56011987", "euc-kr", "iso-ir-149", "korean", "ks_c_5601-1987", "ks_c_5601-1989", "ksc5601", "ksc_5601", "windows-949", ], name: "EUC-KR", }, ], heading: "Legacy multi-byte Korean encodings", }, { encodings: [ { labels: [ "csiso2022kr", "hz-gb-2312", "iso-2022-cn", "iso-2022-cn-ext", "iso-2022-kr", ], name: "replacement", }, { labels: ["utf-16be"], name: "UTF-16BE", }, { labels: ["utf-16", "utf-16le"], name: "UTF-16LE", }, { labels: ["x-user-defined"], name: "x-user-defined", }, ], heading: "Legacy miscellaneous encodings", }, ]; // Label to encoding registry. /** @type {Object.<string,{name:string,labels:Array.<string>}>} */ var label_to_encoding = {}; encodings.forEach(function (category) { category.encodings.forEach(function (encoding) { encoding.labels.forEach(function (label) { label_to_encoding[label] = encoding; }); }); }); // 5.1 Encoders and decoders /** @const */ var finished = -1; function getArrayVal(idxVal) { return Array.isArray(idxVal) ? idxVal : [idxVal]; } /** * @param {number} a The number to test. * @param {number} min The minimum value in the range, inclusive. * @param {number} max The maximum value in the range, inclusive. * @return {boolean} True if a >= min and a <= max. */ function inRange(a, min, max) { return min <= a && a <= max; } /** * @param {!Array.<*>} array The array to check. * @param {*} item The item to look for in the array. * @return {boolean} True if the item appears in the array. */ function includes(array, item) { return array.indexOf(item) !== -1; } /** * @param {*} o * @return {Object} */ function ToDictionary(o) { if (o === undefined || o === null) return {}; if (o === Object(o)) return o; throw TypeError('Could not convert argument to dictionary'); } /** * @param {string} string Input string of UTF-16 code units. * @return {!Array.<number>} Code points. */ function stringToCodePoints(string) { // https://heycam.github.io/webidl/#dfn-obtain-unicode // 1. Let S be the DOMString value. var s = String(string); // 2. Let n be the length of S. var n = s.length; // 3. Initialize i to 0. var i = 0; // 4. Initialize U to be an empty sequence of Unicode characters. var u = []; // 5. While i < n: while (i < n) { // 1. Let c be the code unit in S at index i. var c = s.charCodeAt(i); // 2. Depending on the value of c: // c < 0xD800 or c > 0xDFFF if (c < 0xD800 || c > 0xDFFF) { // Append to U the Unicode character with code point c. u.push(c); } // 0xDC00 ≤ c ≤ 0xDFFF else if (0xDC00 <= c && c <= 0xDFFF) { // Append to U a U+FFFD REPLACEMENT CHARACTER. u.push(0xFFFD); } // 0xD800 ≤ c ≤ 0xDBFF else if (0xD800 <= c && c <= 0xDBFF) { // 1. If i = n−1, then append to U a U+FFFD REPLACEMENT // CHARACTER. if (i === n - 1) { u.push(0xFFFD); } // 2. Otherwise, i < n−1: else { // 1. Let d be the code unit in S at index i+1. var d = s.charCodeAt(i + 1); // 2. If 0xDC00 ≤ d ≤ 0xDFFF, then: if (0xDC00 <= d && d <= 0xDFFF) { // 1. Let a be c & 0x3FF. var a = c & 0x3FF; // 2. Let b be d & 0x3FF. var b = d & 0x3FF; // 3. Append to U the Unicode character with code point // 2^16+2^10*a+b. u.push(0x10000 + (a << 10) + b); // 4. Set i to i+1. i += 1; } // 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a // U+FFFD REPLACEMENT CHARACTER. else { u.push(0xFFFD); } } } // 3. Set i to i+1. i += 1; } // 6. Return U. return u; } /** * @param {!Array.<number>} code_points Array of code points. * @return {string} string String of UTF-16 code units. */ function codePointsToString(code_points) { var s = ''; for (var i = 0; i < code_points.length; ++i) { var cp = code_points[i]; if (cp <= 0xFFFF) { s += String.fromCharCode(cp); } else { cp -= 0x10000; s += String.fromCharCode((cp >> 10) + 0xD800, (cp & 0x3FF) + 0xDC00); } } return s; } function getGlobalScope() { if (typeof global !== 'undefined') return global; if (typeof window !== 'undefined') return window; if (typeof self !== 'undefined') return self; return; } var _encodingIndexes; function checkForEncodingIndexes() { if (typeof TextEncodingIndexes !== 'undefined') return TextEncodingIndexes.encodingIndexes; var glo = getGlobalScope(); if (!glo) return null; if ('TextEncodingIndexes' in glo) return global['TextEncodingIndexes']['encodingIndexes']; if ('encoding-indexes' in glo) return global['encodingIndexes']; return null; } function getEncodingIndexes() { if (_encodingIndexes) { return _encodingIndexes; } var indexes = checkForEncodingIndexes(); if (!indexes) { return null; } _encodingIndexes = indexes; return indexes; } /** * @param {number} pointer The |pointer| to search for. * @param {(!Array.<?number>|undefined)} index The |index| to search within. * @return {?number} The code point corresponding to |pointer| in |index|, * or null if |code point| is not in |index|. */ function indexCodePointFor(pointer, index) { if (!index) return null; return index[pointer] || null; } /** * @param {number} code_point The |code point| to search for. * @param {!Array.<?number>} index The |index| to search within. * @return {?number} The first pointer corresponding to |code point| in * |index|, or null if |code point| is not in |index|. */ function indexPointerFor(code_point, index) { var pointer = index.indexOf(code_point); return pointer === -1 ? null : pointer; } /** * @param {string} name Name of the index. * @return {(!Array.<number>|!Array.<Array.<number>>)} * */ function index(name) { var encodingIndexes = getEncodingIndexes(); if (!encodingIndexes) { throw Error("Indexes missing." + " Did you forget to include encoding-indexes.js first?"); } return encodingIndexes[name]; } /** * @param {number} pointer The |pointer| to search for in the gb18030 index. * @return {?number} The code point corresponding to |pointer| in |index|, * or null if |code point| is not in the gb18030 index. */ function indexGB18030RangesCodePointFor(pointer) { // 1. If pointer is greater than 39419 and less than 189000, or // pointer is greater than 1237575, return null. if ((pointer > 39419 && pointer < 189000) || (pointer > 1237575)) return null; // 2. If pointer is 7457, return code point U+E7C7. if (pointer === 7457) return 0xE7C7; // 3. Let offset be the last pointer in index gb18030 ranges that // is equal to or less than pointer and let code point offset be // its corresponding code point. var offset = 0; var code_point_offset = 0; var idx = index('gb18030-ranges'); for (var i = 0; i < idx.length; ++i) { /** @type {!Array.<number>} */ var entry = getArrayVal(idx[i]); if (entry[0] <= pointer) { offset = entry[0]; code_point_offset = entry[1]; } else { break; } } // 4. Return a code point whose value is code point offset + // pointer − offset. return code_point_offset + pointer - offset; } /** * @param {number} code_point The |code point| to locate in the gb18030 index. * @return {number} The first pointer corresponding to |code point| in the * gb18030 index. */ function indexGB18030RangesPointerFor(code_point) { // 1. If code point is U+E7C7, return pointer 7457. if (code_point === 0xE7C7) return 7457; // 2. Let offset be the last code point in index gb18030 ranges // that is equal to or less than code point and let pointer offset // be its corresponding pointer. var offset = 0; var pointer_offset = 0; var idx = index('gb18030-ranges'); for (var i = 0; i < idx.length; ++i) { var idxVal = idx[i]; /** @type {!Array.<number>} */ var entry = getArrayVal(idxVal); if (entry[1] <= code_point) { offset = entry[1]; pointer_offset = entry[0]; } else { break; } } // 3. Return a pointer whose value is pointer offset + code point // − offset. return pointer_offset + code_point - offset; } /** * @param {number} code_point The |code_point| to search for in the Shift_JIS * index. * @return {?number} The code point corresponding to |pointer| in |index|, * or null if |code point| is not in the Shift_JIS index. */ function indexShiftJISPointerFor(code_point) { // 1. Let index be index jis0208 excluding all entries whose // pointer is in the range 8272 to 8835, inclusive. shift_jis_index = shift_jis_index || index('jis0208').map(function (code_point, pointer) { return inRange(pointer, 8272, 8835) ? null : code_point; }); var index_ = shift_jis_index; // 2. Return the index pointer for code point in index. return index_.indexOf(code_point); } var shift_jis_index; /** * @param {number} code_point The |code_point| to search for in the big5 * index. * @return {?number} The code point corresponding to |pointer| in |index|, * or null if |code point| is not in the big5 index. */ function indexBig5PointerFor(code_point) { // 1. Let index be index Big5 excluding all entries whose pointer big5_index_no_hkscs = big5_index_no_hkscs || index('big5').map(function (code_point, pointer) { return (pointer < (0xA1 - 0x81) * 157) ? null : code_point; }); var index_ = big5_index_no_hkscs; // 2. If code point is U+2550, U+255E, U+2561, U+256A, U+5341, or // U+5345, return the last pointer corresponding to code point in // index. if (code_point === 0x2550 || code_point === 0x255E || code_point === 0x2561 || code_point === 0x256A || code_point === 0x5341 || code_point === 0x5345) { return index_.lastIndexOf(code_point); } // 3. Return the index pointer for code point in index. return indexPointerFor(code_point, index_); } var big5_index_no_hkscs; // // Implementation of Encoding specification // https://encoding.spec.whatwg.org/ // // // 4. Terminology // /** * An ASCII byte is a byte in the range 0x00 to 0x7F, inclusive. * @param {number} a The number to test. * @return {boolean} True if a is in the range 0x00 to 0x7F, inclusive. */ function isASCIIByte(a) { return 0x00 <= a && a <= 0x7F; } /** * An ASCII code point is a code point in the range U+0000 to * U+007F, inclusive. */ var isASCIICodePoint = isASCIIByte; /** * End-of-stream is a special token that signifies no more tokens * are in the stream. * @const */ var end_of_stream = -1; /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ var Big5Decoder = /** @class */ (function () { function Big5Decoder(options) { this.fatal = options.fatal; // Big5's decoder has an associated Big5 lead (initially 0x00). /** @type {number} */ this.Big5_lead = 0x00; } /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.<number>)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ Big5Decoder.prototype.handler = function (stream, bite) { // 1. If byte is end-of-stream and Big5 lead is not 0x00, set // Big5 lead to 0x00 and return error. if (bite === end_of_stream && this.Big5_lead !== 0x00) { this.Big5_lead = 0x00; return decoderError(this.fatal); } // 2. If byte is end-of-stream and Big5 lead is 0x00, return // finished. if (bite === end_of_stream && this.Big5_lead === 0x00) return finished; // 3. If Big5 lead is not 0x00, let lead be Big5 lead, let // pointer be null, set Big5 lead to 0x00, and then run these // substeps: if (this.Big5_lead !== 0x00) { var lead = this.Big5_lead; var pointer = null; this.Big5_lead = 0x00; // 1. Let offset be 0x40 if byte is less than 0x7F and 0x62 // otherwise. var offset = bite < 0x7F ? 0x40 : 0x62; // 2. If byte is in the range 0x40 to 0x7E, inclusive, or 0xA1 // to 0xFE, inclusive, set pointer to (lead − 0x81) × 157 + // (byte − offset). if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE)) pointer = (lead - 0x81) * 157 + (bite - offset); // 3. If there is a row in the table below whose first column // is pointer, return the two code points listed in its second // column // Pointer | Code points // --------+-------------- // 1133 | U+00CA U+0304 // 1135 | U+00CA U+030C // 1164 | U+00EA U+0304 // 1166 | U+00EA U+030C switch (pointer) { case 1133: return [0x00CA, 0x0304]; case 1135: return [0x00CA, 0x030C]; case 1164: return [0x00EA, 0x0304]; case 1166: return [0x00EA, 0x030C]; } // 4. Let code point be null if pointer is null and the index // code point for pointer in index Big5 otherwise. var code_point = (pointer === null) ? null : indexCodePointFor(pointer, index('big5')); // 5. If code point is null and byte is an ASCII byte, prepend // byte to stream. if (code_point === null && isASCIIByte(bite)) stream.prepend(bite); // 6. If code point is null, return error. if (code_point === null) return decoderError(this.fatal); // 7. Return a code point whose value is code point. return code_point; } // 4. If byte is an ASCII byte, return a code point whose value // is byte. if (isASCIIByte(bite)) return bite; // 5. If byte is in the range 0x81 to 0xFE, inclusive, set Big5 // lead to byte and return continue. if (inRange(bite, 0x81, 0xFE)) { this.Big5_lead = bite; return null; } // 6. Return error. return decoderError(this.fatal); }; return Big5Decoder; }()); /** * @constructor * @implements {Encoder} * @param {{fatal: boolean}} options */ var Big5Encoder = /** @class */ (function () { function Big5Encoder(options) { this.fatal = options.fatal; } /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.<number>)} Byte(s) to emit. */ Big5Encoder.prototype.handler = function (stream, code_point) { // 1. If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is an ASCII code point, return a byte whose // value is code point. if (isASCIICodePoint(code_point)) return code_point; // 3. Let pointer be the index Big5 pointer for code point. var pointer = indexBig5PointerFor(code_point); // 4. If pointer is null, return error with code point. if (pointer === null) return encoderError(code_point); // 5. Let lead be Math.floor(pointer / 157) + 0x81. var lead = Math.floor(pointer / 157) + 0x81; // 6. If lead is less than 0xA1, return error with code point. if (lead < 0xA1) return encoderError(code_point); // 7. Let trail be pointer % 157. var trail = pointer % 157; // 8. Let offset be 0x40 if trail is less than 0x3F and 0x62 // otherwise. var offset = trail < 0x3F ? 0x40 : 0x62; // Return two bytes whose values are lead and trail + offset. return [lead, trail + offset]; }; return Big5Encoder; }()); /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ var EUCJPDecoder = /** @class */ (function () { function EUCJPDecoder(options) { this.fatal = options.fatal; // euc-jp's decoder has an associated euc-jp jis0212 flag // (initially unset) and euc-jp lead (initially 0x00). /** @type {boolean} */ this.eucjp_jis0212_flag = false, /** @type {number} */ this.eucjp_lead = 0x00; } /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.<number>)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ EUCJPDecoder.prototype.handler = function (stream, bite) { // 1. If byte is end-of-stream and euc-jp lead is not 0x00, set // euc-jp lead to 0x00, and return error. if (bite === end_of_stream && this.eucjp_lead !== 0x00) { this.eucjp_lead = 0x00; return decoderError(this.fatal); } // 2. If byte is end-of-stream and euc-jp lead is 0x00, return // finished. if (bite === end_of_stream && this.eucjp_lead === 0x00) return finished; // 3. If euc-jp lead is 0x8E and byte is in the range 0xA1 to // 0xDF, inclusive, set euc-jp lead to 0x00 and return a code // point whose value is 0xFF61 − 0xA1 + byte. if (this.eucjp_lead === 0x8E && inRange(bite, 0xA1, 0xDF)) { this.eucjp_lead = 0x00; return 0xFF61 - 0xA1 + bite; } // 4. If euc-jp lead is 0x8F and byte is in the range 0xA1 to // 0xFE, inclusive, set the euc-jp jis0212 flag, set euc-jp lead // to byte, and return continue. if (this.eucjp_lead === 0x8F && inRange(bite, 0xA1, 0xFE)) { this.eucjp_jis0212_flag = true; this.eucjp_lead = bite; return null; } // 5. If euc-jp lead is not 0x00, let lead be euc-jp lead, set // euc-jp lead to 0x00, and run these substeps: if (this.eucjp_lead !== 0x00) { var lead = this.eucjp_lead; this.eucjp_lead = 0x00; // 1. Let code point be null. var code_point = null; // 2. If lead and byte are both in the range 0xA1 to 0xFE, // inclusive, set code point to the index code point for (lead // − 0xA1) × 94 + byte − 0xA1 in index jis0208 if the euc-jp // jis0212 flag is unset and in index jis0212 otherwise. if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) { code_point = indexCodePointFor((lead - 0xA1) * 94 + (bite - 0xA1), index(!this.eucjp_jis0212_flag ? 'jis0208' : 'jis0212')); } // 3. Unset the euc-jp jis0212 flag. this.eucjp_jis0212_flag = false; // 4. If byte is not in the range 0xA1 to 0xFE, inclusive, // prepend byte to stream. if (!inRange(bite, 0xA1, 0xFE)) stream.prepend(bite); // 5. If code point is null, return error. if (code_point === null) return decoderError(this.fatal); // 6. Return a code point whose value is code point. return code_point; } // 6. If byte is an ASCII byte, return a code point whose value // is byte. if (isASCIIByte(bite)) return bite; // 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE, // inclusive, set euc-jp lead to byte and return continue. if (bite === 0x8E || bite === 0x8F || inRange(bite, 0xA1, 0xFE)) { this.eucjp_lead = bite; return null; } // 8. Return error. return decoderError(this.fatal); }; return EUCJPDecoder; }()); /** * @constructor * @implements {Encoder} * @param {{fatal: boolean}} options */ var EUCJPEncoder = /** @class */ (function () { function EUCJPEncoder(options) { this.fatal = options.fatal; } /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.<number>)} Byte(s) to emit. */ EUCJPEncoder.prototype.handler = function (stream, code_point) { // 1. If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is an ASCII code point, return a byte whose // value is code point. if (isASCIICodePoint(code_point)) return code_point; // 3. If code point is U+00A5, return byte 0x5C. if (code_point === 0x00A5) return 0x5C; // 4. If code point is U+203E, return byte 0x7E. if (code_point === 0x203E) return 0x7E; // 5. If code point is in the range U+FF61 to U+FF9F, inclusive, // return two bytes whose values are 0x8E and code point − // 0xFF61 + 0xA1. if (inRange(code_point, 0xFF61, 0xFF9F)) return [0x8E, code_point - 0xFF61 + 0xA1]; // 6. If code point is U+2212, set it to U+FF0D. if (code_point === 0x2212) code_point = 0xFF0D; // 7. Let pointer be the index pointer for code point in index // jis0208. var pointer = indexPointerFor(code_point, index('jis0208')); // 8. If pointer is null, return error with code point. if (pointer === null) return encoderError(code_point); // 9. Let lead be Math.floor(pointer / 94) + 0xA1. var lead = Math.floor(pointer / 94) + 0xA1; // 10. Let trail be pointer % 94 + 0xA1. var trail = pointer % 94 + 0xA1; // 11. Return two bytes whose values are lead and trail. return [lead, trail]; }; return EUCJPEncoder; }()); /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ var EUCKRDecoder = /** @class */ (function () { function EUCKRDecoder(options) { this.fatal = options.fatal; // euc-kr's decoder has an associated euc-kr lead (initially 0x00). /** @type {number} */ this.euckr_lead = 0x00; } /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.<number>)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ EUCKRDecoder.prototype.handler = function (stream, bite) { // 1. If byte is end-of-stream and euc-kr lead is not 0x00, set // euc-kr lead to 0x00 and return error. if (bite === end_of_stream && this.euckr_lead !== 0) { this.euckr_lead = 0x00; return decoderError(this.fatal); } // 2. If byte is end-of-stream and euc-kr lead is 0x00, return // finished. if (bite === end_of_stream && this.euckr_lead === 0) return finished; // 3. If euc-kr lead is not 0x00, let lead be euc-kr lead, let // pointer be null, set euc-kr lead to 0x00, and then run these // substeps: if (this.euckr_lead !== 0x00) { var lead = this.euckr_lead; var pointer = null; this.euckr_lead = 0x00; // 1. If byte is in the range 0x41 to 0xFE, inclusive, set // pointer to (lead − 0x81) × 190 + (byte − 0x41). if (inRange(bite, 0x41, 0xFE)) pointer = (lead - 0x81) * 190 + (bite - 0x41); // 2. Let code point be null, if pointer is null, and the // index code point for pointer in index euc-kr otherwise. var code_point = (pointer === null) ? null : indexCodePointFor(pointer, index('euc-kr')); // 3. If code point is null and byte is an ASCII byte, prepend // byte to stream. if (pointer === null && isASCIIByte(bite)) stream.prepend(bite); // 4. If code point is null, return error. if (code_point === null) return decoderError(this.fatal); // 5. Return a code point whose value is code point. return code_point; } // 4. If byte is an ASCII byte, return a code point whose value // is byte. if (isASCIIByte(bite)) return bite; // 5. If byte is in the range 0x81 to 0xFE, inclusive, set // euc-kr lead to byte and return continue. if (inRange(bite, 0x81, 0xFE)) { this.euckr_lead = bite; return null; } // 6. Return error. return decoderError(this.fatal); }; return EUCKRDecoder; }()); /** * @constructor * @implements {Encoder} * @param {{fatal: boolean}} options */ var EUCKREncoder = /** @class */ (function () { function EUCKREncoder(options) { this.fatal = options.fatal; } /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.<number>)} Byte(s) to emit. */ EUCKREncoder.prototype.handler = function (stream, code_point) { // 1. If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is an ASCII code point, return a byte whose // value is code point. if (isASCIICodePoint(code_point)) return code_point; // 3. Let pointer be the index pointer for code point in index // euc-kr. var pointer = indexPointerFor(code_point, index('euc-kr')); // 4. If pointer is null, return error with code point. if (pointer === null) return encoderError(code_point); // 5. Let lead be Math.floor(pointer / 190) + 0x81. var lead = Math.floor(pointer / 190) + 0x81; // 6. Let trail be pointer % 190 + 0x41. var trail = (pointer % 190) + 0x41; // 7. Return two bytes whose values are lead and trail. return [lead, trail]; }; return EUCKREncoder; }()); /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ var GB18030Decoder = /** @class */ (function () { function GB18030Decoder(options) { this.fatal = options.fatal; // gb18030's decoder has an associated gb18030 first, gb18030 // second, and gb18030 third (all initially 0x00). /** @type {number} */ this.gb18030_first = 0x00, /** @type {number} */ this.gb18030_second = 0x00, /** @type {number} */ this.gb18030_third = 0x00; } /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.<number>)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ GB18030Decoder.prototype.handler = function (stream, bite) { // 1. If byte is end-of-stream and gb18030 first, gb18030 // second, and gb18030 third are 0x00, return finished. if (bite === end_of_stream && this.gb18030_first === 0x00 && this.gb18030_second === 0x00 && this.gb18030_third === 0x00) { return finished; } // 2. If byte is end-of-stream, and gb18030 first, gb18030 // second, or gb18030 third is not 0x00, set gb18030 first, // gb18030 second, and gb18030 third to 0x00, and return error. if (bite === end_of_stream && (this.gb18030_first !== 0x00 || this.gb18030_second !== 0x00 || this.gb18030_third !== 0x00)) { this.gb18030_first = 0x00; this.gb18030_second = 0x00; this.gb18030_third = 0x00; decoderError(this.fatal); } var code_point; // 3. If gb18030 third is not 0x00, run these substeps: if (this.gb18030_third !== 0x00) { // 1. Let code point be null. code_point = null; // 2. If byte is in the range 0x30 to 0x39, inclusive, set // code point to the index gb18030 ranges code point for // (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) × // 126 + gb18030 third − 0x81) × 10 + byte − 0x30. if (inRange(bite, 0x30, 0x39)) { code_point = indexGB18030RangesCodePointFor((((this.gb18030_first - 0x81) * 10 + this.gb18030_second - 0x30) * 126 + this.gb18030_third - 0x81) * 10 + bite - 0x30); } // 3. Let buffer be a byte sequence consisting of gb18030 // second, gb18030 third, and byte, in order. var buffer = [this.gb18030_second, this.gb18030_third, bite]; // 4. Set gb18030 first, gb18030 second, and gb18030 third to // 0x00. this.gb18030_first = 0x00; this.gb18030_second = 0x00; this.gb18030_third = 0x00; // 5. If code point is null, prepend buffer to stream and // return error. if (code_point === null) { stream.prepend(buffer); return decoderError(this.fatal); } // 6. Return a code point whose value is code point. return code_point; } // 4. If gb18030 second is not 0x00, run these substeps: if (this.gb18030_second !== 0x00) { // 1. If byte is in the range 0x81 to 0xFE, inclusive, set // gb18030 third to byte and return continue. if (inRange(bite, 0x81, 0xFE)) { this.gb18030_third = bite; return null; } // 2. Prepend gb18030 second followed by byte to stream, set // gb18030 first and gb18030 second to 0x00, and return error. stream.prepend([this.gb18030_second, bite]); this.gb18030_first = 0x00; this.gb18030_second = 0x00; return decoderError(this.fatal); } // 5. If gb18030 first is not 0x00, run these substeps: if (this.gb18030_first !== 0x00) { // 1. If byte is in the range 0x30 to 0x39, inclusive, set // gb18030 second to byte and return continue. if (inRange(bite, 0x30, 0x39)) { this.gb18030_second = bite; return null; } // 2. Let lead be gb18030 first, let pointer be null, and set // gb18030 first to 0x00. var lead = this.gb18030_first; var pointer = null; this.gb18030_first = 0x00; // 3. Let offset be 0x40 if byte is less than 0x7F and 0x41 // otherwise. var offset = bite < 0x7F ? 0x40 : 0x41; // 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80 // to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 + // (byte − offset). if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE)) pointer = (lead - 0x81) * 190 + (bite - offset); // 5. Let code point be null if pointer is null and the index // code point for pointer in index gb18030 otherwise. code_point = pointer === null ? null : indexCodePointFor(pointer, index('gb18030')); // 6. If code point is null and byte is an ASCII byte, prepend // byte to stream. if (code_point === null && isASCIIByte(bite)) stream.prepend(bite); // 7. If code point is null, return error. if (code_point === null) return decoderError(this.fatal); // 8. Return a code point whose value is code point. return code_point; } // 6. I