@kokr/text
Version:
A utility to help handle investigations in Korean sentences. / 한국어 문장의 조사 처리를 도와주는 유틸리티입니다. 은/는/이/가 등의 조사를 적절하게 처리합니다.
111 lines (110 loc) • 2.24 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.jongseong = void 0;
const RE_DIGIT_ZEROS = /[1-9](0+)$/;
const RE_ENG_LAST_TWO = /[a-z]{2}$/i;
const ㄱ = 1;
const ㄴ = 4;
const ㄹ = 8;
const ㅁ = 16;
const ㅂ = 17;
const ㅅ = 19;
const ㅇ = 21;
const digitZerosMap = [
ㅂ,
ㄱ,
ㄴ,
ㄴ,
ㄴ,
ㄴ,
ㄴ,
ㄱ,
ㄱ,
ㄱ,
ㄱ,
0,
0,
0,
0,
ㅇ,
ㅇ,
ㅇ,
ㅇ,
0,
0,
0,
0, // 천해
];
const digitMap = [
ㅇ,
ㄹ,
0,
ㅁ,
0,
0,
ㄱ,
ㄹ,
ㄹ,
0, // 구
];
const engSuffix2Map = {
nd: 0,
ne: ㄴ,
le: ㄹ,
ng: ㅇ,
};
const engSuffixMap = {
b: ㅂ,
c: ㄱ,
d: ㅅ,
k: ㄱ,
l: ㄹ,
m: ㅁ,
n: ㄴ,
p: ㅂ,
t: ㅅ,
};
const engCharMap = {
l: ㄹ,
m: ㅁ,
n: ㄴ,
r: ㄹ,
};
/** @internal */
function jongseong(word) {
let w = word;
while (w.length) {
// strip paren ABC(D) => ABC
w = w.replace(/\([^)]*\)$/, "");
const last = w[w.length - 1];
const lastCharCode = last.charCodeAt(0);
if (lastCharCode >= 44032 && lastCharCode <= 55203) { // 가-힣
return (lastCharCode - 44032) % 28;
}
// digit
if (lastCharCode >= 48 && lastCharCode <= 57) { // 0-9
const zerosMatch = RE_DIGIT_ZEROS.exec(w);
if (zerosMatch) {
return digitZerosMap[zerosMatch[1].length - 1] ?? 0;
}
return digitMap[lastCharCode - 48];
}
// english
if (lastCharCode >= 65 && lastCharCode <= 90 ||
lastCharCode >= 97 && lastCharCode <= 122) {
const match = RE_ENG_LAST_TWO.exec(w);
if (match) {
const suffix2 = match[0].toLowerCase();
const code = engSuffix2Map[suffix2];
if (typeof code === "number") {
return code;
}
return engSuffixMap[suffix2[1]] || 0;
}
return engCharMap[last.toLowerCase()] ?? 0;
}
w = w.slice(0, w.length - 1);
}
return 0;
}
exports.jongseong = jongseong;