stringencoding
Version:
Encode to/from Typed Array buffers
151 lines (142 loc) • 4.5 kB
JavaScript
// 11.2 iso-2022-jp
/**
* @constructor
* @param {{fatal: boolean}} options
*/
function ISO2022JPDecoder(options) {
var fatal = options.fatal;
/** @enum */
var state = {
ASCII: 0,
escape_start: 1,
escape_middle: 2,
escape_final: 3,
lead: 4,
trail: 5,
Katakana: 6
};
var /** @type {number} */ iso2022jp_state = state.ASCII,
/** @type {boolean} */ iso2022jp_jis0212 = false,
/** @type {number} */ iso2022jp_lead = 0x00;
/**
* @param {ByteInputStream} byte_pointer The byte stream to decode.
* @return {?number} The next code point decoded, or null if not enough
* data exists in the input stream to decode a complete code point.
*/
this.decode = function(byte_pointer) {
var bite = byte_pointer.get();
if (bite !== EOF_byte) {
byte_pointer.offset(1);
}
switch (iso2022jp_state) {
default:
case state.ASCII:
if (bite === 0x1B) {
iso2022jp_state = state.escape_start;
return null;
}
if (inRange(bite, 0x00, 0x7F)) {
return bite;
}
if (bite === EOF_byte) {
return EOF_code_point;
}
return decoderError(fatal);
case state.escape_start:
if (bite === 0x24 || bite === 0x28) {
iso2022jp_lead = bite;
iso2022jp_state = state.escape_middle;
return null;
}
if (bite !== EOF_byte) {
byte_pointer.offset(-1);
}
iso2022jp_state = state.ASCII;
return decoderError(fatal);
case state.escape_middle:
var lead = iso2022jp_lead;
iso2022jp_lead = 0x00;
if (lead === 0x24 && (bite === 0x40 || bite === 0x42)) {
iso2022jp_jis0212 = false;
iso2022jp_state = state.lead;
return null;
}
if (lead === 0x24 && bite === 0x28) {
iso2022jp_state = state.escape_final;
return null;
}
if (lead === 0x28 && (bite === 0x42 || bite === 0x4A)) {
iso2022jp_state = state.ASCII;
return null;
}
if (lead === 0x28 && bite === 0x49) {
iso2022jp_state = state.Katakana;
return null;
}
if (bite === EOF_byte) {
byte_pointer.offset(-1);
} else {
byte_pointer.offset(-2);
}
iso2022jp_state = state.ASCII;
return decoderError(fatal);
case state.escape_final:
if (bite === 0x44) {
iso2022jp_jis0212 = true;
iso2022jp_state = state.lead;
return null;
}
if (bite === EOF_byte) {
byte_pointer.offset(-2);
} else {
byte_pointer.offset(-3);
}
iso2022jp_state = state.ASCII;
return decoderError(fatal);
case state.lead:
if (bite === 0x0A) {
iso2022jp_state = state.ASCII;
return decoderError(fatal, 0x000A);
}
if (bite === 0x1B) {
iso2022jp_state = state.escape_start;
return null;
}
if (bite === EOF_byte) {
return EOF_code_point;
}
iso2022jp_lead = bite;
iso2022jp_state = state.trail;
return null;
case state.trail:
iso2022jp_state = state.lead;
if (bite === EOF_byte) {
return decoderError(fatal);
}
var code_point = null;
var pointer = (iso2022jp_lead - 0x21) * 94 + bite - 0x21;
if (inRange(iso2022jp_lead, 0x21, 0x7E) &&
inRange(bite, 0x21, 0x7E)) {
code_point = (iso2022jp_jis0212 === false) ?
indexCodePointFor(pointer, indexes['jis0208']) :
indexCodePointFor(pointer, indexes['jis0212']);
}
if (code_point === null) {
return decoderError(fatal);
}
return code_point;
case state.Katakana:
if (bite === 0x1B) {
iso2022jp_state = state.escape_start;
return null;
}
if (inRange(bite, 0x21, 0x5F)) {
return 0xFF61 + bite - 0x21;
}
if (bite === EOF_byte) {
return EOF_code_point;
}
return decoderError(fatal);
}
};
}