UNPKG

@kayahr/text-encoding

Version:
113 lines 3.63 kB
/* * Copyright (C) 2021 Klaus Reimer <k@ailis.de> * See LICENSE.md for licensing information. */ import gb18030 from "../../../data/gb18030.cp.js"; import gb18030ranges from "../../../data/gb18030.ranges.js"; import { AbstractDecoder } from "../AbstractDecoder.js"; import { END_OF_BUFFER } from "../ByteBuffer.js"; import { FINISHED } from "../constants.js"; import { inRange, isASCII } from "../util.js"; /** * Returns the code point for the given gb18030 index. * * @param index - The index in the gb18030 code point table. * @returns The code point corresponding to the given index. */ function getCodePoint(index) { if ((index > 39419 && index < 189000) || (index > 1237575)) { return null; } if (index === 7457) { return 0xE7C7; } let offset = 0; let codePointOffset = 0; const idx = gb18030ranges; for (const entry of idx) { if (entry[0] > index) { break; } offset = entry[0]; codePointOffset = entry[1]; } return codePointOffset + index - offset; } /** * Decoder for gb18030 and gbk encoding. */ export class GBDecoder extends AbstractDecoder { first = 0x00; second = 0x00; third = 0x00; /** @inheritdoc */ decode(buffer) { const byte = buffer.read(); if (byte === END_OF_BUFFER && this.first === 0x00 && this.second === 0x00 && this.third === 0x00) { return FINISHED; } if (byte === END_OF_BUFFER && (this.first !== 0x00 || this.second !== 0x00 || this.third !== 0x00)) { this.first = 0x00; this.second = 0x00; this.third = 0x00; return this.fail(); } let codePoint; if (this.third !== 0x00) { codePoint = null; if (inRange(byte, 0x30, 0x39)) { codePoint = getCodePoint((((this.first - 0x81) * 10 + this.second - 0x30) * 126 + this.third - 0x81) * 10 + byte - 0x30); } const bytes = [this.second, this.third, byte]; this.first = 0x00; this.second = 0x00; this.third = 0x00; if (codePoint == null) { buffer.write(...bytes); return this.fail(); } return codePoint; } if (this.second !== 0x00) { if (inRange(byte, 0x81, 0xFE)) { this.third = byte; return null; } buffer.write(this.second, byte); this.first = 0x00; this.second = 0x00; return this.fail(); } if (this.first !== 0x00) { if (inRange(byte, 0x30, 0x39)) { this.second = byte; return null; } const lead = this.first; let index = null; this.first = 0x00; const offset = byte < 0x7F ? 0x40 : 0x41; if (inRange(byte, 0x40, 0x7E) || inRange(byte, 0x80, 0xFE)) { index = (lead - 0x81) * 190 + (byte - offset); } codePoint = index == null ? null : gb18030[index] ?? null; if (codePoint == null && isASCII(byte)) { buffer.write(byte); } return codePoint ?? this.fail(); } if (isASCII(byte)) { return byte; } if (byte === 0x80) { return 0x20AC; } if (inRange(byte, 0x81, 0xFE)) { this.first = byte; return null; } return this.fail(); } } //# sourceMappingURL=GBDecoder.js.map