UNPKG

hanja

Version:

Sino-Korean, aka Hanja, Utilities for Korean Language Processing

91 lines (90 loc) 3.11 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.split = exports.translate = void 0; const hanjaTable = require("./data/hanjaeum.json"); function isHanja(char) { return char in hanjaTable; } function split(text) { let prevIsHanja = null; let segment = ''; const result = []; for (const char of text) { const currentIsHanja = isHanja(char); if (prevIsHanja === currentIsHanja) { segment += char; } else { if (segment) result.push(segment); segment = char; prevIsHanja = currentIsHanja; } } if (segment) result.push(segment); return result; } exports.split = split; function dueum(char, context) { var _a; const chars = char.normalize('NFKD'); // 모음이나 ㄴ 받침 뒤의 '렬, 률' → '열, 율' if ('렬률'.includes(char)) { const prevChar = (_a = context[context.length - 1]) === null || _a === void 0 ? void 0 : _a.normalize('NFKD'); if (!prevChar || prevChar[2] === undefined || prevChar[2] === '안'.normalize('NFKD')[2]) { return ('ㅇ' + chars.slice(1)).normalize('NFKC'); } } if (context !== '') return char; // 단어 첫머리의 '녀, 뇨, 뉴, 니' → '여, 요, 유, 이' if (chars[0] === 'ㄴ'.normalize('NFKD') && 'ㅑㅕㅛㅠㅣㅖ'.normalize('NFKD').includes(chars[1])) { return ('ㅇ' + chars.slice(1)).normalize('NFKC'); } // 단어 첫머리의 '랴, 려, 례, 료, 류, 리' → '야, 여, 예, 요, 유, 이' if (chars[0] === 'ㄹ'.normalize('NFKD') && 'ㅑㅕㅛㅠㅣㅖ'.normalize('NFKD').includes(chars[1])) { return ('ㅇ' + chars.slice(1)).normalize('NFKC'); } // 단어 첫머리의 '라, 래, 로, 뢰, 루, 르' → '나, 내, 노, 뇌, 누, 느' if (chars[0] === 'ㄹ'.normalize('NFKD') && 'ㅏㅗㅜㅡㅐㅚ'.normalize('NFKD').includes(chars[1])) { return ('ㄴ' + chars.slice(1)).normalize('NFKC'); } return char; } function translate(text, mode) { if (mode === 'SUBSTITUTION') { let result = ''; for (const char of text) { if (char in hanjaTable) { result += dueum(hanjaTable[char], result); } else { result += char; } } return result; } if (typeof mode === 'function') { const fn = mode; let result = ''; const segments = split(text); for (const segment of segments) { if (!isHanja(segment[0])) { result += segment; } else { result += fn(segment, translate(segment, 'SUBSTITUTION')); } } return result; } if (mode === 'PARENTHESIS_HANGUL') { return translate(text, (hanja, hangul) => `${hanja}(${hangul})`); } if (mode === 'PARENTHESIS_HANJA') { return translate(text, (hanja, hangul) => `${hangul}(${hanja})`); } return text; } exports.translate = translate;