UNPKG

jtc-utils

Version:
187 lines (186 loc) 7.24 kB
import { PackedMap } from "../util/PackedMap.mjs"; import { IBMKanjiDecodeMap } from "./IBMKanjiDecodeMap.mjs"; import { IBMKanjiEncodeMap } from "./IBMKanjiEncodeMap.mjs"; class Cp939Charset { get name() { return "cp939"; } createDecoder(options) { return new Cp939Decoder(options); } createEncoder(options) { return new Cp939Encoder(options); } isUnicode() { return false; } isEbcdic() { return true; } } const EbcdicDecodeMap = Uint16Array.of( // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0x0000, 0x0001, 0x0002, 0x0003, 0x009c, 0x0009, 0x0086, 0x007f, 0x0097, 0x008d, 0x008e, 0x000b, 0x000c, 0x000d, 0xfffd, 0xfffd, // 0x 0x0010, 0x0011, 0x0012, 0x0013, 0x009d, 0x000a, 0x0008, 0x0087, 0x0018, 0x0019, 0x0092, 0x008f, 0x001c, 0x001d, 0x001e, 0x001f, // 1x 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x000a, 0x0017, 0x001b, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x0005, 0x0006, 0x0007, // 2x 0x0090, 0x0091, 0x0016, 0x0093, 0x0094, 0x0095, 0x0096, 0x0004, 0x0098, 0x0099, 0x009a, 0x009b, 0x0014, 0x0015, 0x009e, 0x001a, // 3x 0x0020, 0xfffd, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67, 0xff68, 0x00a2, 0x002e, 0x003c, 0x0028, 0x002b, 0x007c, // 4x 0x0026, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff70, 0xff71, 0x0021, 0x0024, 0x002a, 0x0029, 0x003b, 0x00ac, // 5x 0x002d, 0x002f, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, 0xfffd, 0x002c, 0x0025, 0x005f, 0x003e, 0x003f, // 6x 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80, 0xff81, 0xff82, 0x0060, 0x003a, 0x0023, 0x0040, 0x0027, 0x003d, 0x0022, // 7x 0xfffd, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, // 8x 0xfffd, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, // 9x 0x203e, 0x007e, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0xff8f, 0xff90, 0xff91, 0x005b, 0xff92, 0xff93, // Ax 0x005e, 0x00a3, 0x00a5, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0x005d, 0xff9e, 0xff9f, // Bx 0x007b, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // Cx 0x007d, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // Dx 0x005c, 0xfffd, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // Ex 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x009f); class Cp939Decoder { fatal; state; constructor(options) { this.fatal = options?.fatal ?? true; IBMKanjiDecodeMap.initialize(); } decode(input, options) { let shift = options?.shift ?? false; const array = new Array(); if (options?.stream && this.state) { shift = this.state.shift; array.push(this.state.buf); this.state = undefined; } for (let i = 0; i < input.length; i++) { let fail = false; const n = input[i]; if (n === 0x0e) { shift = true; } else if (n === 0x0f) { shift = false; } else if (shift) { if (i + 1 < input.length) { i++; const enc = IBMKanjiDecodeMap.get((n << 8) | input[i]); if (enc != null) { array.push(enc); } else { fail = true; } } else if (options?.stream) { this.state = { shift, buf: n }; break; } else { fail = true; } } else { const c = EbcdicDecodeMap[n]; if (c !== 0xfffd) { array.push(c); } else { fail = true; } } if (fail) { if (this.fatal) { throw TypeError(`The input ${n.toString(16)} could not be encoded.`); } else { array.push(0xfffd); } } } return String.fromCharCode(...array); } } const EbcdicEncodeMap = new PackedMap((m) => { const decoder = new Cp939Decoder({ fatal: false }); m.set(0x85, 0x15); const buf = new Uint8Array(1); for (let b = 0x00; b <= 0xff; b++) { buf[0] = b; const decoded = decoder.decode(buf); if (decoded && decoded !== "\uFFFD") { m.set(decoded.charCodeAt(0), b); } } }); class Cp939Encoder { fatal; constructor(options) { this.fatal = options?.fatal ?? true; EbcdicEncodeMap.initialize(); IBMKanjiEncodeMap.initialize(); } canEncode(str) { for (let i = 0; i < str.length; i++) { const cp = str.charCodeAt(i); if (EbcdicEncodeMap.get(cp) != null) { // EBCDIC // no handle } else if (IBMKanjiEncodeMap.get(cp) != null) { // no handle } else { return false; } } return true; } encode(str, options) { const out = new Array(); const limit = options?.limit ?? Number.POSITIVE_INFINITY; let shift = options?.shift ?? false; let prev = 0; for (let i = 0; i < str.length; i++) { prev = out.length; const cp = str.charCodeAt(i); let enc; if ((enc = EbcdicEncodeMap.get(cp)) != null) { // EBCDIC if (shift) { out.push(0x0f); shift = false; } out.push(enc); } else if ((enc = IBMKanjiEncodeMap.get(cp)) != null) { if (!shift) { out.push(0x0e); shift = true; } out.push((enc >>> 8) & 0xff); out.push(enc & 0xff); } else { if (this.fatal) { throw TypeError(`The code point ${cp.toString(16)} could not be encoded`); } else if (shift) { out.push(0x42); // ? out.push(0x6f); } else { out.push(0x6f); // ? } } if (out.length > limit) { out.length = prev; break; } } if (shift && options?.shift !== true && out.length < limit) { out.push(0x0f); } return Uint8Array.from(out); } } export const cp939 = new Cp939Charset();