jtc-utils
Version:
Utilities for Japanese Traditional Companies
147 lines (133 loc) • 3.63 kB
text/typescript
import { PackedMap } from "../util/PackedMap.cjs";
import { JISEncodeMap } from "./JISEncodeMap.cjs";
import {
type Charset,
type CharsetDecoderOptions,
type CharsetEncodeOptions,
type CharsetEncoder,
type CharsetEncoderOptions,
StandardDecoder,
} from "./charset.cjs";
class EucjpCharset implements Charset {
get name() {
return "euc-jp";
}
createDecoder(options?: CharsetDecoderOptions) {
return new StandardDecoder("euc-jp");
}
createEncoder(options?: CharsetEncoderOptions) {
return new EucjpEncoder(options);
}
isUnicode() {
return false;
}
isEbcdic() {
return false;
}
}
const EucjpEncodeMap = new PackedMap((m) => {
const decoder = new TextDecoder("euc-jp");
// EUC-JP additional mapping
m.set(0xa5, 0x5c);
m.set(0x203e, 0x7e);
m.set(0x2170, 0x8ff3f3);
m.set(0x2171, 0x8ff3f4);
m.set(0x2172, 0x8ff3f5);
m.set(0x2173, 0x8ff3f6);
m.set(0x2174, 0x8ff3f7);
m.set(0x2175, 0x8ff3f8);
m.set(0x2176, 0x8ff3f9);
m.set(0x2177, 0x8ff3fa);
m.set(0x2178, 0x8ff3fb);
m.set(0x2179, 0x8ff3fc);
m.set(0x2212, 0xa1dd);
m.set(0x2225, 0xa1c2);
m.set(0x301c, 0xa1c1);
const buf = new Uint8Array(3);
buf[0] = 0x8f;
for (const hba of [
[0xa1, 0xab],
[0xb0, 0xed],
[0xf3, 0xf4],
]) {
for (let hb = hba[0]; hb <= hba[1]; hb++) {
buf[1] = hb;
for (let lb = 0xa1; lb <= 0xfe; lb++) {
buf[2] = lb;
const decoded = decoder.decode(buf);
if (decoded !== "\uFFFD") {
m.set(decoded.charCodeAt(0), (0x8f << 16) | (hb << 8) | lb);
}
}
}
}
});
class EucjpEncoder implements CharsetEncoder {
private fatal;
constructor(options?: CharsetEncoderOptions) {
this.fatal = options?.fatal ?? true;
JISEncodeMap.initialize();
EucjpEncodeMap.initialize();
}
canEncode(str: string, options?: CharsetEncodeOptions) {
for (let i = 0; i < str.length; i++) {
const cp = str.charCodeAt(i);
if (cp <= 0x7f) {
// ASCII
// no handle
} else if (cp >= 0xff61 && cp <= 0xff9f) {
// 半角カナ
// no handle
} else {
let jis = JISEncodeMap.get(cp);
if (jis != null) {
// no handle
} else if ((jis = EucjpEncodeMap.get(cp)) != null) {
// no handle
} else {
return false;
}
}
}
return true;
}
encode(str: string, options?: CharsetEncodeOptions): Uint8Array {
const out = new Array<number>();
for (let i = 0; i < str.length; i++) {
const cp = str.charCodeAt(i);
if (cp <= 0x7f) {
// ASCII
out.push(cp);
} else if (cp >= 0xff61 && cp <= 0xff9f) {
// 半角カナ
out.push(0x8e);
out.push(cp - 0xff61 + 0xa1);
} else {
let jis = JISEncodeMap.get(cp);
if (jis != null) {
out.push(((jis >>> 8) + 0x80) & 0xff);
out.push((jis + 0x80) & 0xff);
} else if ((jis = EucjpEncodeMap.get(cp)) != null) {
if (jis > 0xffff) {
out.push((jis >>> 16) & 0xff);
out.push((jis >>> 8) & 0xff);
out.push(jis & 0xff);
} else if (jis > 0xff) {
out.push((jis >>> 8) & 0xff);
out.push(jis & 0xff);
} else {
out.push(jis & 0xff);
}
} else if (this.fatal) {
throw TypeError(
`The code point ${cp.toString(16)} could not be encoded`,
);
} else {
out.push(0x5f); // ?
}
}
}
return Uint8Array.from(out);
}
}
export const eucjp = new EucjpCharset();