UNPKG

hanja

Version:

Sino-Korean, aka Hanja, Utilities for Korean Language Processing

90 lines (89 loc) 3.21 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.split = exports.translate = void 0; const hanjaTable = require("./data/hanjaeum.json"); function isHanja(char) { return char in hanjaTable; } function split(text) { let prev = null; let segment = null; const result = []; for (const char of text) { if (prev === isHanja(char)) { if (segment !== null) segment += char; continue; } if (segment !== null) result.push(segment); prev = !prev; segment = ""; segment += char; } if (segment !== null) result.push(segment); return result; } exports.split = split; function dueum(char, context) { var _a; const chars = char.normalize('NFKD'); // 모음이나 ㄴ 받침 뒤에 이어지는 '렬, 률'은 '열, 율'로 발음한다. if ('렬률'.includes(char)) { const prevChars = (_a = context[context.length - 1]) === null || _a === void 0 ? void 0 : _a.normalize('NFKD'); if (prevChars === undefined || prevChars[2] === undefined || prevChars[2] === '안'.normalize('NFKD')[2]) { return ('ㅇ' + chars.slice(1)).normalize('NFKC'); } } if (context !== '') return char; // 한자음 '녀, 뇨, 뉴, 니', '랴, 려, 례, 료, 류, 리'가 단어 첫머리에 올 때 // '여, 요, 유, 이', '야, 여, 예, 요, 유, 이'로 발음한다. if (chars[0] === 'ㄴ'.normalize('NFKD') && 'ㅑㅕㅛㅠㅣㅖ'.normalize('NFKD').includes(chars[1])) { return ('ㅇ' + chars.slice(1)).normalize('NFKC'); } if (chars[0] === 'ㄹ'.normalize('NFKD') && 'ㅑㅕㅛㅠㅣㅖ'.normalize('NFKD').includes(chars[1])) { return ('ㅇ' + chars.slice(1)).normalize('NFKC'); } // 한자음 '라, 래, 로, 뢰, 루, 르'가 단어 첫머리에 올 때 '나, 내, 노, 뇌, // 누, 느'로 발음한다. if (chars[0] === 'ㄹ'.normalize('NFKD') && 'ㅏㅗㅜㅡㅐㅚ'.normalize('NFKD').includes(chars[1])) { return ('ㄴ' + chars.slice(1)).normalize('NFKC'); } return char; } function translate(text, mode) { if (mode === 'SUBSTITUTION') { let result = ""; for (const char of text) { if (char in hanjaTable) result += dueum(hanjaTable[char], result); else result += char; } return result; } else if (typeof mode === 'function') { const fn = mode || function (hanja) { return hanja; }; let result = ""; const segments = split(text); for (const segment of segments) { if (!isHanja(segment[0])) { result += segment; } else { result += fn(segment, translate(segment, 'SUBSTITUTION')); } } return result; } else if (mode === 'PARENTHESIS_HANGUL') { return translate(text, (hanja, hangul) => `${hanja}(${hangul})`); } else if (mode === 'PARENTHESIS_HANJA') { return translate(text, (hanja, hangul) => `${hangul}(${hanja})`); } return text; } exports.translate = translate;