@zxing/text-encoding
Version:
Polyfill for the Encoding Living Standard's API.
1,256 lines (1,240 loc) • 125 kB
JavaScript
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
typeof define === 'function' && define.amd ? define(['exports'], factory) :
(global = global || self, factory(global.TextEncoding = {}));
}(this, (function (exports) { 'use strict';
/** @const */ var DEFAULT_ENCODING = 'utf-8';
/**
* @param {boolean} fatal If true, decoding errors raise an exception.
* @param {number=} opt_code_point Override the standard fallback code point.
* @return {number} The code point to insert on a decoding error.
*/
function decoderError(fatal, opt_code_point) {
if (opt_code_point === void 0) { opt_code_point = undefined; }
if (fatal)
throw TypeError("Decoder error");
return opt_code_point || 0xfffd;
}
/**
* @param {number} code_point The code point that could not be encoded.
* @return {number} Always throws, no value is actually returned.
*/
function encoderError(code_point) {
throw TypeError("The code point " + code_point + " could not be encoded.");
}
// 5.2 Names and labels
// TODO: Define @typedef for Encoding: {name:string,labels:Array.<string>}
// https://github.com/google/closure-compiler/issues/247
/**
* @param {string} label The encoding label.
* @return {?{name:string,labels:Array.<string>}}
*/
function getEncoding(label) {
// 1. Remove any leading and trailing ASCII whitespace from label.
var keyLabel = String(label).trim().toLowerCase();
// 2. If label is an ASCII case-insensitive match for any of the
// labels listed in the table below, return the corresponding
// encoding, and failure otherwise.
if (keyLabel in label_to_encoding) {
return label_to_encoding[keyLabel];
}
return null;
}
/**
* Encodings table: https://encoding.spec.whatwg.org/encodings.json
* @const
* @type {!Array.<{
* heading: string,
* encodings: Array.<{name:string,labels:Array.<string>}>
* }>}
*/
var encodings = [
{
encodings: [
{
labels: ["unicode-1-1-utf-8", "utf-8", "utf8"],
name: "UTF-8",
},
],
heading: "The Encoding",
},
{
encodings: [
{
labels: ["866", "cp866", "csibm866", "ibm866"],
name: "IBM866",
},
{
labels: [
"csisolatin2",
"iso-8859-2",
"iso-ir-101",
"iso8859-2",
"iso88592",
"iso_8859-2",
"iso_8859-2:1987",
"l2",
"latin2",
],
name: "ISO-8859-2",
},
{
labels: [
"csisolatin3",
"iso-8859-3",
"iso-ir-109",
"iso8859-3",
"iso88593",
"iso_8859-3",
"iso_8859-3:1988",
"l3",
"latin3",
],
name: "ISO-8859-3",
},
{
labels: [
"csisolatin4",
"iso-8859-4",
"iso-ir-110",
"iso8859-4",
"iso88594",
"iso_8859-4",
"iso_8859-4:1988",
"l4",
"latin4",
],
name: "ISO-8859-4",
},
{
labels: [
"csisolatincyrillic",
"cyrillic",
"iso-8859-5",
"iso-ir-144",
"iso8859-5",
"iso88595",
"iso_8859-5",
"iso_8859-5:1988",
],
name: "ISO-8859-5",
},
{
labels: [
"arabic",
"asmo-708",
"csiso88596e",
"csiso88596i",
"csisolatinarabic",
"ecma-114",
"iso-8859-6",
"iso-8859-6-e",
"iso-8859-6-i",
"iso-ir-127",
"iso8859-6",
"iso88596",
"iso_8859-6",
"iso_8859-6:1987",
],
name: "ISO-8859-6",
},
{
labels: [
"csisolatingreek",
"ecma-118",
"elot_928",
"greek",
"greek8",
"iso-8859-7",
"iso-ir-126",
"iso8859-7",
"iso88597",
"iso_8859-7",
"iso_8859-7:1987",
"sun_eu_greek",
],
name: "ISO-8859-7",
},
{
labels: [
"csiso88598e",
"csisolatinhebrew",
"hebrew",
"iso-8859-8",
"iso-8859-8-e",
"iso-ir-138",
"iso8859-8",
"iso88598",
"iso_8859-8",
"iso_8859-8:1988",
"visual",
],
name: "ISO-8859-8",
},
{
labels: ["csiso88598i", "iso-8859-8-i", "logical"],
name: "ISO-8859-8-I",
},
{
labels: [
"csisolatin6",
"iso-8859-10",
"iso-ir-157",
"iso8859-10",
"iso885910",
"l6",
"latin6",
],
name: "ISO-8859-10",
},
{
labels: ["iso-8859-13", "iso8859-13", "iso885913"],
name: "ISO-8859-13",
},
{
labels: ["iso-8859-14", "iso8859-14", "iso885914"],
name: "ISO-8859-14",
},
{
labels: [
"csisolatin9",
"iso-8859-15",
"iso8859-15",
"iso885915",
"iso_8859-15",
"l9",
],
name: "ISO-8859-15",
},
{
labels: ["iso-8859-16"],
name: "ISO-8859-16",
},
{
labels: ["cskoi8r", "koi", "koi8", "koi8-r", "koi8_r"],
name: "KOI8-R",
},
{
labels: ["koi8-ru", "koi8-u"],
name: "KOI8-U",
},
{
labels: ["csmacintosh", "mac", "macintosh", "x-mac-roman"],
name: "macintosh",
},
{
labels: [
"dos-874",
"iso-8859-11",
"iso8859-11",
"iso885911",
"tis-620",
"windows-874",
],
name: "windows-874",
},
{
labels: ["cp1250", "windows-1250", "x-cp1250"],
name: "windows-1250",
},
{
labels: ["cp1251", "windows-1251", "x-cp1251"],
name: "windows-1251",
},
{
labels: [
"ansi_x3.4-1968",
"cp1252",
"cp819",
"ibm819",
"iso-ir-100",
"windows-1252",
"x-cp1252",
],
name: "windows-1252",
},
{
labels: [
"ascii",
"us-ascii",
"iso-8859-1",
"iso8859-1",
"iso88591",
"iso_8859-1",
"iso_8859-1:1987",
"l1",
"latin1",
"csisolatin1",
],
name: "iso-8859-1",
},
{
labels: ["cp1253", "windows-1253", "x-cp1253"],
name: "windows-1253",
},
{
labels: [
"cp1254",
"csisolatin5",
"iso-8859-9",
"iso-ir-148",
"iso8859-9",
"iso88599",
"iso_8859-9",
"iso_8859-9:1989",
"l5",
"latin5",
"windows-1254",
"x-cp1254",
],
name: "windows-1254",
},
{
labels: ["cp1255", "windows-1255", "x-cp1255"],
name: "windows-1255",
},
{
labels: ["cp1256", "windows-1256", "x-cp1256"],
name: "windows-1256",
},
{
labels: ["cp1257", "windows-1257", "x-cp1257"],
name: "windows-1257",
},
{
labels: ["cp1258", "windows-1258", "x-cp1258"],
name: "windows-1258",
},
{
labels: ["x-mac-cyrillic", "x-mac-ukrainian"],
name: "x-mac-cyrillic",
},
],
heading: "Legacy single-byte encodings",
},
{
encodings: [
{
labels: [
"chinese",
"csgb2312",
"csiso58gb231280",
"gb2312",
"gb_2312",
"gb_2312-80",
"gbk",
"iso-ir-58",
"x-gbk",
],
name: "GBK",
},
{
labels: ["gb18030"],
name: "gb18030",
},
],
heading: "Legacy multi-byte Chinese (simplified) encodings",
},
{
encodings: [
{
labels: ["big5", "big5-hkscs", "cn-big5", "csbig5", "x-x-big5"],
name: "Big5",
},
],
heading: "Legacy multi-byte Chinese (traditional) encodings",
},
{
encodings: [
{
labels: ["cseucpkdfmtjapanese", "euc-jp", "x-euc-jp"],
name: "EUC-JP",
},
{
labels: ["csiso2022jp", "iso-2022-jp"],
name: "ISO-2022-JP",
},
{
labels: [
"csshiftjis",
"ms932",
"ms_kanji",
"shift-jis",
"shift_jis",
"sjis",
"windows-31j",
"x-sjis",
],
name: "Shift_JIS",
},
],
heading: "Legacy multi-byte Japanese encodings",
},
{
encodings: [
{
labels: [
"cseuckr",
"csksc56011987",
"euc-kr",
"iso-ir-149",
"korean",
"ks_c_5601-1987",
"ks_c_5601-1989",
"ksc5601",
"ksc_5601",
"windows-949",
],
name: "EUC-KR",
},
],
heading: "Legacy multi-byte Korean encodings",
},
{
encodings: [
{
labels: [
"csiso2022kr",
"hz-gb-2312",
"iso-2022-cn",
"iso-2022-cn-ext",
"iso-2022-kr",
],
name: "replacement",
},
{
labels: ["utf-16be"],
name: "UTF-16BE",
},
{
labels: ["utf-16", "utf-16le"],
name: "UTF-16LE",
},
{
labels: ["x-user-defined"],
name: "x-user-defined",
},
],
heading: "Legacy miscellaneous encodings",
},
];
// Label to encoding registry.
/** @type {Object.<string,{name:string,labels:Array.<string>}>} */
var label_to_encoding = {};
encodings.forEach(function (category) {
category.encodings.forEach(function (encoding) {
encoding.labels.forEach(function (label) {
label_to_encoding[label] = encoding;
});
});
});
// 5.1 Encoders and decoders
/** @const */
var finished = -1;
function getArrayVal(idxVal) {
return Array.isArray(idxVal) ? idxVal : [idxVal];
}
/**
* @param {number} a The number to test.
* @param {number} min The minimum value in the range, inclusive.
* @param {number} max The maximum value in the range, inclusive.
* @return {boolean} True if a >= min and a <= max.
*/
function inRange(a, min, max) {
return min <= a && a <= max;
}
/**
* @param {!Array.<*>} array The array to check.
* @param {*} item The item to look for in the array.
* @return {boolean} True if the item appears in the array.
*/
function includes(array, item) {
return array.indexOf(item) !== -1;
}
/**
* @param {*} o
* @return {Object}
*/
function ToDictionary(o) {
if (o === undefined || o === null)
return {};
if (o === Object(o))
return o;
throw TypeError('Could not convert argument to dictionary');
}
/**
* @param {string} string Input string of UTF-16 code units.
* @return {!Array.<number>} Code points.
*/
function stringToCodePoints(string) {
// https://heycam.github.io/webidl/#dfn-obtain-unicode
// 1. Let S be the DOMString value.
var s = String(string);
// 2. Let n be the length of S.
var n = s.length;
// 3. Initialize i to 0.
var i = 0;
// 4. Initialize U to be an empty sequence of Unicode characters.
var u = [];
// 5. While i < n:
while (i < n) {
// 1. Let c be the code unit in S at index i.
var c = s.charCodeAt(i);
// 2. Depending on the value of c:
// c < 0xD800 or c > 0xDFFF
if (c < 0xD800 || c > 0xDFFF) {
// Append to U the Unicode character with code point c.
u.push(c);
}
// 0xDC00 ≤ c ≤ 0xDFFF
else if (0xDC00 <= c && c <= 0xDFFF) {
// Append to U a U+FFFD REPLACEMENT CHARACTER.
u.push(0xFFFD);
}
// 0xD800 ≤ c ≤ 0xDBFF
else if (0xD800 <= c && c <= 0xDBFF) {
// 1. If i = n−1, then append to U a U+FFFD REPLACEMENT
// CHARACTER.
if (i === n - 1) {
u.push(0xFFFD);
}
// 2. Otherwise, i < n−1:
else {
// 1. Let d be the code unit in S at index i+1.
var d = s.charCodeAt(i + 1);
// 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
if (0xDC00 <= d && d <= 0xDFFF) {
// 1. Let a be c & 0x3FF.
var a = c & 0x3FF;
// 2. Let b be d & 0x3FF.
var b = d & 0x3FF;
// 3. Append to U the Unicode character with code point
// 2^16+2^10*a+b.
u.push(0x10000 + (a << 10) + b);
// 4. Set i to i+1.
i += 1;
}
// 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a
// U+FFFD REPLACEMENT CHARACTER.
else {
u.push(0xFFFD);
}
}
}
// 3. Set i to i+1.
i += 1;
}
// 6. Return U.
return u;
}
/**
* @param {!Array.<number>} code_points Array of code points.
* @return {string} string String of UTF-16 code units.
*/
function codePointsToString(code_points) {
var s = '';
for (var i = 0; i < code_points.length; ++i) {
var cp = code_points[i];
if (cp <= 0xFFFF) {
s += String.fromCharCode(cp);
}
else {
cp -= 0x10000;
s += String.fromCharCode((cp >> 10) + 0xD800, (cp & 0x3FF) + 0xDC00);
}
}
return s;
}
function getGlobalScope() {
if (typeof global !== 'undefined')
return global;
if (typeof window !== 'undefined')
return window;
if (typeof self !== 'undefined')
return self;
return;
}
var _encodingIndexes;
function checkForEncodingIndexes() {
if (typeof TextEncodingIndexes !== 'undefined')
return TextEncodingIndexes.encodingIndexes;
var glo = getGlobalScope();
if (!glo)
return null;
if ('TextEncodingIndexes' in glo)
return global['TextEncodingIndexes']['encodingIndexes'];
if ('encoding-indexes' in glo)
return global['encodingIndexes'];
return null;
}
function getEncodingIndexes() {
if (_encodingIndexes) {
return _encodingIndexes;
}
var indexes = checkForEncodingIndexes();
if (!indexes) {
return null;
}
_encodingIndexes = indexes;
return indexes;
}
/**
* @param {number} pointer The |pointer| to search for.
* @param {(!Array.<?number>|undefined)} index The |index| to search within.
* @return {?number} The code point corresponding to |pointer| in |index|,
* or null if |code point| is not in |index|.
*/
function indexCodePointFor(pointer, index) {
if (!index)
return null;
return index[pointer] || null;
}
/**
* @param {number} code_point The |code point| to search for.
* @param {!Array.<?number>} index The |index| to search within.
* @return {?number} The first pointer corresponding to |code point| in
* |index|, or null if |code point| is not in |index|.
*/
function indexPointerFor(code_point, index) {
var pointer = index.indexOf(code_point);
return pointer === -1 ? null : pointer;
}
/**
* @param {string} name Name of the index.
* @return {(!Array.<number>|!Array.<Array.<number>>)}
* */
function index(name) {
var encodingIndexes = getEncodingIndexes();
if (!encodingIndexes) {
throw Error("Indexes missing." +
" Did you forget to include encoding-indexes.js first?");
}
return encodingIndexes[name];
}
/**
* @param {number} pointer The |pointer| to search for in the gb18030 index.
* @return {?number} The code point corresponding to |pointer| in |index|,
* or null if |code point| is not in the gb18030 index.
*/
function indexGB18030RangesCodePointFor(pointer) {
// 1. If pointer is greater than 39419 and less than 189000, or
// pointer is greater than 1237575, return null.
if ((pointer > 39419 && pointer < 189000) || (pointer > 1237575))
return null;
// 2. If pointer is 7457, return code point U+E7C7.
if (pointer === 7457)
return 0xE7C7;
// 3. Let offset be the last pointer in index gb18030 ranges that
// is equal to or less than pointer and let code point offset be
// its corresponding code point.
var offset = 0;
var code_point_offset = 0;
var idx = index('gb18030-ranges');
for (var i = 0; i < idx.length; ++i) {
/** @type {!Array.<number>} */
var entry = getArrayVal(idx[i]);
if (entry[0] <= pointer) {
offset = entry[0];
code_point_offset = entry[1];
}
else {
break;
}
}
// 4. Return a code point whose value is code point offset +
// pointer − offset.
return code_point_offset + pointer - offset;
}
/**
* @param {number} code_point The |code point| to locate in the gb18030 index.
* @return {number} The first pointer corresponding to |code point| in the
* gb18030 index.
*/
function indexGB18030RangesPointerFor(code_point) {
// 1. If code point is U+E7C7, return pointer 7457.
if (code_point === 0xE7C7)
return 7457;
// 2. Let offset be the last code point in index gb18030 ranges
// that is equal to or less than code point and let pointer offset
// be its corresponding pointer.
var offset = 0;
var pointer_offset = 0;
var idx = index('gb18030-ranges');
for (var i = 0; i < idx.length; ++i) {
var idxVal = idx[i];
/** @type {!Array.<number>} */
var entry = getArrayVal(idxVal);
if (entry[1] <= code_point) {
offset = entry[1];
pointer_offset = entry[0];
}
else {
break;
}
}
// 3. Return a pointer whose value is pointer offset + code point
// − offset.
return pointer_offset + code_point - offset;
}
/**
* @param {number} code_point The |code_point| to search for in the Shift_JIS
* index.
* @return {?number} The code point corresponding to |pointer| in |index|,
* or null if |code point| is not in the Shift_JIS index.
*/
function indexShiftJISPointerFor(code_point) {
// 1. Let index be index jis0208 excluding all entries whose
// pointer is in the range 8272 to 8835, inclusive.
shift_jis_index = shift_jis_index ||
index('jis0208').map(function (code_point, pointer) {
return inRange(pointer, 8272, 8835) ? null : code_point;
});
var index_ = shift_jis_index;
// 2. Return the index pointer for code point in index.
return index_.indexOf(code_point);
}
var shift_jis_index;
/**
* @param {number} code_point The |code_point| to search for in the big5
* index.
* @return {?number} The code point corresponding to |pointer| in |index|,
* or null if |code point| is not in the big5 index.
*/
function indexBig5PointerFor(code_point) {
// 1. Let index be index Big5 excluding all entries whose pointer
big5_index_no_hkscs = big5_index_no_hkscs ||
index('big5').map(function (code_point, pointer) {
return (pointer < (0xA1 - 0x81) * 157) ? null : code_point;
});
var index_ = big5_index_no_hkscs;
// 2. If code point is U+2550, U+255E, U+2561, U+256A, U+5341, or
// U+5345, return the last pointer corresponding to code point in
// index.
if (code_point === 0x2550 || code_point === 0x255E ||
code_point === 0x2561 || code_point === 0x256A ||
code_point === 0x5341 || code_point === 0x5345) {
return index_.lastIndexOf(code_point);
}
// 3. Return the index pointer for code point in index.
return indexPointerFor(code_point, index_);
}
var big5_index_no_hkscs;
//
// Implementation of Encoding specification
// https://encoding.spec.whatwg.org/
//
//
// 4. Terminology
//
/**
* An ASCII byte is a byte in the range 0x00 to 0x7F, inclusive.
* @param {number} a The number to test.
* @return {boolean} True if a is in the range 0x00 to 0x7F, inclusive.
*/
function isASCIIByte(a) {
return 0x00 <= a && a <= 0x7F;
}
/**
* An ASCII code point is a code point in the range U+0000 to
* U+007F, inclusive.
*/
var isASCIICodePoint = isASCIIByte;
/**
* End-of-stream is a special token that signifies no more tokens
* are in the stream.
* @const
*/ var end_of_stream = -1;
/**
* @constructor
* @implements {Decoder}
* @param {{fatal: boolean}} options
*/
var Big5Decoder = /** @class */ (function () {
function Big5Decoder(options) {
this.fatal = options.fatal;
// Big5's decoder has an associated Big5 lead (initially 0x00).
/** @type {number} */ this.Big5_lead = 0x00;
}
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point.
*/
Big5Decoder.prototype.handler = function (stream, bite) {
// 1. If byte is end-of-stream and Big5 lead is not 0x00, set
// Big5 lead to 0x00 and return error.
if (bite === end_of_stream && this.Big5_lead !== 0x00) {
this.Big5_lead = 0x00;
return decoderError(this.fatal);
}
// 2. If byte is end-of-stream and Big5 lead is 0x00, return
// finished.
if (bite === end_of_stream && this.Big5_lead === 0x00)
return finished;
// 3. If Big5 lead is not 0x00, let lead be Big5 lead, let
// pointer be null, set Big5 lead to 0x00, and then run these
// substeps:
if (this.Big5_lead !== 0x00) {
var lead = this.Big5_lead;
var pointer = null;
this.Big5_lead = 0x00;
// 1. Let offset be 0x40 if byte is less than 0x7F and 0x62
// otherwise.
var offset = bite < 0x7F ? 0x40 : 0x62;
// 2. If byte is in the range 0x40 to 0x7E, inclusive, or 0xA1
// to 0xFE, inclusive, set pointer to (lead − 0x81) × 157 +
// (byte − offset).
if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE))
pointer = (lead - 0x81) * 157 + (bite - offset);
// 3. If there is a row in the table below whose first column
// is pointer, return the two code points listed in its second
// column
// Pointer | Code points
// --------+--------------
// 1133 | U+00CA U+0304
// 1135 | U+00CA U+030C
// 1164 | U+00EA U+0304
// 1166 | U+00EA U+030C
switch (pointer) {
case 1133: return [0x00CA, 0x0304];
case 1135: return [0x00CA, 0x030C];
case 1164: return [0x00EA, 0x0304];
case 1166: return [0x00EA, 0x030C];
}
// 4. Let code point be null if pointer is null and the index
// code point for pointer in index Big5 otherwise.
var code_point = (pointer === null) ? null :
indexCodePointFor(pointer, index('big5'));
// 5. If code point is null and byte is an ASCII byte, prepend
// byte to stream.
if (code_point === null && isASCIIByte(bite))
stream.prepend(bite);
// 6. If code point is null, return error.
if (code_point === null)
return decoderError(this.fatal);
// 7. Return a code point whose value is code point.
return code_point;
}
// 4. If byte is an ASCII byte, return a code point whose value
// is byte.
if (isASCIIByte(bite))
return bite;
// 5. If byte is in the range 0x81 to 0xFE, inclusive, set Big5
// lead to byte and return continue.
if (inRange(bite, 0x81, 0xFE)) {
this.Big5_lead = bite;
return null;
}
// 6. Return error.
return decoderError(this.fatal);
};
return Big5Decoder;
}());
/**
* @constructor
* @implements {Encoder}
* @param {{fatal: boolean}} options
*/
var Big5Encoder = /** @class */ (function () {
function Big5Encoder(options) {
this.fatal = options.fatal;
}
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
* @return {(number|!Array.<number>)} Byte(s) to emit.
*/
Big5Encoder.prototype.handler = function (stream, code_point) {
// 1. If code point is end-of-stream, return finished.
if (code_point === end_of_stream)
return finished;
// 2. If code point is an ASCII code point, return a byte whose
// value is code point.
if (isASCIICodePoint(code_point))
return code_point;
// 3. Let pointer be the index Big5 pointer for code point.
var pointer = indexBig5PointerFor(code_point);
// 4. If pointer is null, return error with code point.
if (pointer === null)
return encoderError(code_point);
// 5. Let lead be Math.floor(pointer / 157) + 0x81.
var lead = Math.floor(pointer / 157) + 0x81;
// 6. If lead is less than 0xA1, return error with code point.
if (lead < 0xA1)
return encoderError(code_point);
// 7. Let trail be pointer % 157.
var trail = pointer % 157;
// 8. Let offset be 0x40 if trail is less than 0x3F and 0x62
// otherwise.
var offset = trail < 0x3F ? 0x40 : 0x62;
// Return two bytes whose values are lead and trail + offset.
return [lead, trail + offset];
};
return Big5Encoder;
}());
/**
* @constructor
* @implements {Decoder}
* @param {{fatal: boolean}} options
*/
var EUCJPDecoder = /** @class */ (function () {
function EUCJPDecoder(options) {
this.fatal = options.fatal;
// euc-jp's decoder has an associated euc-jp jis0212 flag
// (initially unset) and euc-jp lead (initially 0x00).
/** @type {boolean} */ this.eucjp_jis0212_flag = false,
/** @type {number} */ this.eucjp_lead = 0x00;
}
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point.
*/
EUCJPDecoder.prototype.handler = function (stream, bite) {
// 1. If byte is end-of-stream and euc-jp lead is not 0x00, set
// euc-jp lead to 0x00, and return error.
if (bite === end_of_stream && this.eucjp_lead !== 0x00) {
this.eucjp_lead = 0x00;
return decoderError(this.fatal);
}
// 2. If byte is end-of-stream and euc-jp lead is 0x00, return
// finished.
if (bite === end_of_stream && this.eucjp_lead === 0x00)
return finished;
// 3. If euc-jp lead is 0x8E and byte is in the range 0xA1 to
// 0xDF, inclusive, set euc-jp lead to 0x00 and return a code
// point whose value is 0xFF61 − 0xA1 + byte.
if (this.eucjp_lead === 0x8E && inRange(bite, 0xA1, 0xDF)) {
this.eucjp_lead = 0x00;
return 0xFF61 - 0xA1 + bite;
}
// 4. If euc-jp lead is 0x8F and byte is in the range 0xA1 to
// 0xFE, inclusive, set the euc-jp jis0212 flag, set euc-jp lead
// to byte, and return continue.
if (this.eucjp_lead === 0x8F && inRange(bite, 0xA1, 0xFE)) {
this.eucjp_jis0212_flag = true;
this.eucjp_lead = bite;
return null;
}
// 5. If euc-jp lead is not 0x00, let lead be euc-jp lead, set
// euc-jp lead to 0x00, and run these substeps:
if (this.eucjp_lead !== 0x00) {
var lead = this.eucjp_lead;
this.eucjp_lead = 0x00;
// 1. Let code point be null.
var code_point = null;
// 2. If lead and byte are both in the range 0xA1 to 0xFE,
// inclusive, set code point to the index code point for (lead
// − 0xA1) × 94 + byte − 0xA1 in index jis0208 if the euc-jp
// jis0212 flag is unset and in index jis0212 otherwise.
if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) {
code_point = indexCodePointFor((lead - 0xA1) * 94 + (bite - 0xA1), index(!this.eucjp_jis0212_flag ? 'jis0208' : 'jis0212'));
}
// 3. Unset the euc-jp jis0212 flag.
this.eucjp_jis0212_flag = false;
// 4. If byte is not in the range 0xA1 to 0xFE, inclusive,
// prepend byte to stream.
if (!inRange(bite, 0xA1, 0xFE))
stream.prepend(bite);
// 5. If code point is null, return error.
if (code_point === null)
return decoderError(this.fatal);
// 6. Return a code point whose value is code point.
return code_point;
}
// 6. If byte is an ASCII byte, return a code point whose value
// is byte.
if (isASCIIByte(bite))
return bite;
// 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE,
// inclusive, set euc-jp lead to byte and return continue.
if (bite === 0x8E || bite === 0x8F || inRange(bite, 0xA1, 0xFE)) {
this.eucjp_lead = bite;
return null;
}
// 8. Return error.
return decoderError(this.fatal);
};
return EUCJPDecoder;
}());
/**
* @constructor
* @implements {Encoder}
* @param {{fatal: boolean}} options
*/
var EUCJPEncoder = /** @class */ (function () {
function EUCJPEncoder(options) {
this.fatal = options.fatal;
}
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
* @return {(number|!Array.<number>)} Byte(s) to emit.
*/
EUCJPEncoder.prototype.handler = function (stream, code_point) {
// 1. If code point is end-of-stream, return finished.
if (code_point === end_of_stream)
return finished;
// 2. If code point is an ASCII code point, return a byte whose
// value is code point.
if (isASCIICodePoint(code_point))
return code_point;
// 3. If code point is U+00A5, return byte 0x5C.
if (code_point === 0x00A5)
return 0x5C;
// 4. If code point is U+203E, return byte 0x7E.
if (code_point === 0x203E)
return 0x7E;
// 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
// return two bytes whose values are 0x8E and code point −
// 0xFF61 + 0xA1.
if (inRange(code_point, 0xFF61, 0xFF9F))
return [0x8E, code_point - 0xFF61 + 0xA1];
// 6. If code point is U+2212, set it to U+FF0D.
if (code_point === 0x2212)
code_point = 0xFF0D;
// 7. Let pointer be the index pointer for code point in index
// jis0208.
var pointer = indexPointerFor(code_point, index('jis0208'));
// 8. If pointer is null, return error with code point.
if (pointer === null)
return encoderError(code_point);
// 9. Let lead be Math.floor(pointer / 94) + 0xA1.
var lead = Math.floor(pointer / 94) + 0xA1;
// 10. Let trail be pointer % 94 + 0xA1.
var trail = pointer % 94 + 0xA1;
// 11. Return two bytes whose values are lead and trail.
return [lead, trail];
};
return EUCJPEncoder;
}());
/**
* @constructor
* @implements {Decoder}
* @param {{fatal: boolean}} options
*/
var EUCKRDecoder = /** @class */ (function () {
function EUCKRDecoder(options) {
this.fatal = options.fatal;
// euc-kr's decoder has an associated euc-kr lead (initially 0x00).
/** @type {number} */ this.euckr_lead = 0x00;
}
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point.
*/
EUCKRDecoder.prototype.handler = function (stream, bite) {
// 1. If byte is end-of-stream and euc-kr lead is not 0x00, set
// euc-kr lead to 0x00 and return error.
if (bite === end_of_stream && this.euckr_lead !== 0) {
this.euckr_lead = 0x00;
return decoderError(this.fatal);
}
// 2. If byte is end-of-stream and euc-kr lead is 0x00, return
// finished.
if (bite === end_of_stream && this.euckr_lead === 0)
return finished;
// 3. If euc-kr lead is not 0x00, let lead be euc-kr lead, let
// pointer be null, set euc-kr lead to 0x00, and then run these
// substeps:
if (this.euckr_lead !== 0x00) {
var lead = this.euckr_lead;
var pointer = null;
this.euckr_lead = 0x00;
// 1. If byte is in the range 0x41 to 0xFE, inclusive, set
// pointer to (lead − 0x81) × 190 + (byte − 0x41).
if (inRange(bite, 0x41, 0xFE))
pointer = (lead - 0x81) * 190 + (bite - 0x41);
// 2. Let code point be null, if pointer is null, and the
// index code point for pointer in index euc-kr otherwise.
var code_point = (pointer === null)
? null : indexCodePointFor(pointer, index('euc-kr'));
// 3. If code point is null and byte is an ASCII byte, prepend
// byte to stream.
if (pointer === null && isASCIIByte(bite))
stream.prepend(bite);
// 4. If code point is null, return error.
if (code_point === null)
return decoderError(this.fatal);
// 5. Return a code point whose value is code point.
return code_point;
}
// 4. If byte is an ASCII byte, return a code point whose value
// is byte.
if (isASCIIByte(bite))
return bite;
// 5. If byte is in the range 0x81 to 0xFE, inclusive, set
// euc-kr lead to byte and return continue.
if (inRange(bite, 0x81, 0xFE)) {
this.euckr_lead = bite;
return null;
}
// 6. Return error.
return decoderError(this.fatal);
};
return EUCKRDecoder;
}());
/**
* @constructor
* @implements {Encoder}
* @param {{fatal: boolean}} options
*/
var EUCKREncoder = /** @class */ (function () {
function EUCKREncoder(options) {
this.fatal = options.fatal;
}
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
* @return {(number|!Array.<number>)} Byte(s) to emit.
*/
EUCKREncoder.prototype.handler = function (stream, code_point) {
// 1. If code point is end-of-stream, return finished.
if (code_point === end_of_stream)
return finished;
// 2. If code point is an ASCII code point, return a byte whose
// value is code point.
if (isASCIICodePoint(code_point))
return code_point;
// 3. Let pointer be the index pointer for code point in index
// euc-kr.
var pointer = indexPointerFor(code_point, index('euc-kr'));
// 4. If pointer is null, return error with code point.
if (pointer === null)
return encoderError(code_point);
// 5. Let lead be Math.floor(pointer / 190) + 0x81.
var lead = Math.floor(pointer / 190) + 0x81;
// 6. Let trail be pointer % 190 + 0x41.
var trail = (pointer % 190) + 0x41;
// 7. Return two bytes whose values are lead and trail.
return [lead, trail];
};
return EUCKREncoder;
}());
/**
* @constructor
* @implements {Decoder}
* @param {{fatal: boolean}} options
*/
var GB18030Decoder = /** @class */ (function () {
function GB18030Decoder(options) {
this.fatal = options.fatal;
// gb18030's decoder has an associated gb18030 first, gb18030
// second, and gb18030 third (all initially 0x00).
/** @type {number} */ this.gb18030_first = 0x00,
/** @type {number} */ this.gb18030_second = 0x00,
/** @type {number} */ this.gb18030_third = 0x00;
}
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point.
*/
GB18030Decoder.prototype.handler = function (stream, bite) {
// 1. If byte is end-of-stream and gb18030 first, gb18030
// second, and gb18030 third are 0x00, return finished.
if (bite === end_of_stream && this.gb18030_first === 0x00 &&
this.gb18030_second === 0x00 && this.gb18030_third === 0x00) {
return finished;
}
// 2. If byte is end-of-stream, and gb18030 first, gb18030
// second, or gb18030 third is not 0x00, set gb18030 first,
// gb18030 second, and gb18030 third to 0x00, and return error.
if (bite === end_of_stream &&
(this.gb18030_first !== 0x00 || this.gb18030_second !== 0x00 ||
this.gb18030_third !== 0x00)) {
this.gb18030_first = 0x00;
this.gb18030_second = 0x00;
this.gb18030_third = 0x00;
decoderError(this.fatal);
}
var code_point;
// 3. If gb18030 third is not 0x00, run these substeps:
if (this.gb18030_third !== 0x00) {
// 1. Let code point be null.
code_point = null;
// 2. If byte is in the range 0x30 to 0x39, inclusive, set
// code point to the index gb18030 ranges code point for
// (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) ×
// 126 + gb18030 third − 0x81) × 10 + byte − 0x30.
if (inRange(bite, 0x30, 0x39)) {
code_point = indexGB18030RangesCodePointFor((((this.gb18030_first - 0x81) * 10 + this.gb18030_second - 0x30) * 126 +
this.gb18030_third - 0x81) * 10 + bite - 0x30);
}
// 3. Let buffer be a byte sequence consisting of gb18030
// second, gb18030 third, and byte, in order.
var buffer = [this.gb18030_second, this.gb18030_third, bite];
// 4. Set gb18030 first, gb18030 second, and gb18030 third to
// 0x00.
this.gb18030_first = 0x00;
this.gb18030_second = 0x00;
this.gb18030_third = 0x00;
// 5. If code point is null, prepend buffer to stream and
// return error.
if (code_point === null) {
stream.prepend(buffer);
return decoderError(this.fatal);
}
// 6. Return a code point whose value is code point.
return code_point;
}
// 4. If gb18030 second is not 0x00, run these substeps:
if (this.gb18030_second !== 0x00) {
// 1. If byte is in the range 0x81 to 0xFE, inclusive, set
// gb18030 third to byte and return continue.
if (inRange(bite, 0x81, 0xFE)) {
this.gb18030_third = bite;
return null;
}
// 2. Prepend gb18030 second followed by byte to stream, set
// gb18030 first and gb18030 second to 0x00, and return error.
stream.prepend([this.gb18030_second, bite]);
this.gb18030_first = 0x00;
this.gb18030_second = 0x00;
return decoderError(this.fatal);
}
// 5. If gb18030 first is not 0x00, run these substeps:
if (this.gb18030_first !== 0x00) {
// 1. If byte is in the range 0x30 to 0x39, inclusive, set
// gb18030 second to byte and return continue.
if (inRange(bite, 0x30, 0x39)) {
this.gb18030_second = bite;
return null;
}
// 2. Let lead be gb18030 first, let pointer be null, and set
// gb18030 first to 0x00.
var lead = this.gb18030_first;
var pointer = null;
this.gb18030_first = 0x00;
// 3. Let offset be 0x40 if byte is less than 0x7F and 0x41
// otherwise.
var offset = bite < 0x7F ? 0x40 : 0x41;
// 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
// to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 +
// (byte − offset).
if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE))
pointer = (lead - 0x81) * 190 + (bite - offset);
// 5. Let code point be null if pointer is null and the index
// code point for pointer in index gb18030 otherwise.
code_point = pointer === null ? null :
indexCodePointFor(pointer, index('gb18030'));
// 6. If code point is null and byte is an ASCII byte, prepend
// byte to stream.
if (code_point === null && isASCIIByte(bite))
stream.prepend(bite);
// 7. If code point is null, return error.
if (code_point === null)
return decoderError(this.fatal);
// 8. Return a code point whose value is code point.
return code_point;
}
// 6. I