hanja
Version:
Sino-Korean, aka Hanja, Utilities for Korean Language Processing
90 lines (89 loc) • 3.21 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.split = exports.translate = void 0;
const hanjaTable = require("./data/hanjaeum.json");
function isHanja(char) {
return char in hanjaTable;
}
function split(text) {
let prev = null;
let segment = null;
const result = [];
for (const char of text) {
if (prev === isHanja(char)) {
if (segment !== null)
segment += char;
continue;
}
if (segment !== null)
result.push(segment);
prev = !prev;
segment = "";
segment += char;
}
if (segment !== null)
result.push(segment);
return result;
}
exports.split = split;
function dueum(char, context) {
var _a;
const chars = char.normalize('NFKD');
// 모음이나 ㄴ 받침 뒤에 이어지는 '렬, 률'은 '열, 율'로 발음한다.
if ('렬률'.includes(char)) {
const prevChars = (_a = context[context.length - 1]) === null || _a === void 0 ? void 0 : _a.normalize('NFKD');
if (prevChars === undefined || prevChars[2] === undefined || prevChars[2] === '안'.normalize('NFKD')[2]) {
return ('ㅇ' + chars.slice(1)).normalize('NFKC');
}
}
if (context !== '')
return char;
// 한자음 '녀, 뇨, 뉴, 니', '랴, 려, 례, 료, 류, 리'가 단어 첫머리에 올 때
// '여, 요, 유, 이', '야, 여, 예, 요, 유, 이'로 발음한다.
if (chars[0] === 'ㄴ'.normalize('NFKD') && 'ㅑㅕㅛㅠㅣㅖ'.normalize('NFKD').includes(chars[1])) {
return ('ㅇ' + chars.slice(1)).normalize('NFKC');
}
if (chars[0] === 'ㄹ'.normalize('NFKD') && 'ㅑㅕㅛㅠㅣㅖ'.normalize('NFKD').includes(chars[1])) {
return ('ㅇ' + chars.slice(1)).normalize('NFKC');
}
// 한자음 '라, 래, 로, 뢰, 루, 르'가 단어 첫머리에 올 때 '나, 내, 노, 뇌,
// 누, 느'로 발음한다.
if (chars[0] === 'ㄹ'.normalize('NFKD') && 'ㅏㅗㅜㅡㅐㅚ'.normalize('NFKD').includes(chars[1])) {
return ('ㄴ' + chars.slice(1)).normalize('NFKC');
}
return char;
}
function translate(text, mode) {
if (mode === 'SUBSTITUTION') {
let result = "";
for (const char of text) {
if (char in hanjaTable)
result += dueum(hanjaTable[char], result);
else
result += char;
}
return result;
}
else if (typeof mode === 'function') {
const fn = mode || function (hanja) { return hanja; };
let result = "";
const segments = split(text);
for (const segment of segments) {
if (!isHanja(segment[0])) {
result += segment;
}
else {
result += fn(segment, translate(segment, 'SUBSTITUTION'));
}
}
return result;
}
else if (mode === 'PARENTHESIS_HANGUL') {
return translate(text, (hanja, hangul) => `${hanja}(${hangul})`);
}
else if (mode === 'PARENTHESIS_HANJA') {
return translate(text, (hanja, hangul) => `${hangul}(${hanja})`);
}
return text;
}
exports.translate = translate;