UNPKG

hangul-tools

Version:

A Hangul library for various Hangul/Korean operations including reading numbers, adding josa, and converting between different keyboards

301 lines (276 loc) 7.97 kB
"use strict"; (function(){ var HANGUL_FIRST = '가', HANGUL_LAST = '힣', HANGUL_FIRST_CODE = HANGUL_FIRST.charCodeAt(0), HANGUL_LAST_CODE = HANGUL_LAST.charCodeAt(0); var CHOSEONG = "ㄱㄲㄴㄷㄸㄹㅁㅂㅃㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎ", JUNGSEONG = "ㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ", JONGSEONG = "Xㄱㄲㄳㄴㄵㄶㄷㄹㄺㄻㄼㄽㄾㄿㅀㅁㅂㅄㅅㅆㅇㅈㅊㅋㅌㅍㅎ", CHOSEONG_LEN = CHOSEONG.length, JUNGSEONG_LEN = JUNGSEONG.length, JONGSEONG_LEN = JONGSEONG.length; var HANGUL_NUMBERS = [ "영", "일一壹", "이二", "삼三參", "사四", "오五伍", "육六", "칠七", "팔八", "구九", "십十拾" ]; var padZero = function padZero(n, l){ for(n+=''; n.length<l; n='0'+n); return n; }; // Making a regular expression for Numerals var numRegExp_thousand = null, numRegExp = null, numReadMap = {}; (function(h_nums){ h_nums.forEach(function(s, i){ s.split('').forEach(function(c){ numReadMap[c] = i; }); }); h_nums.십 = h_nums[10]; h_nums.백 = "백百"; h_nums.천 = "천千仟"; h_nums.만 = "만萬"; h_nums.억 = "억億"; h_nums.조 = "조兆"; h_nums.경 = "경京"; var digits = "[1-9" + h_nums.join('') + "]", thousands = "(?:"+digits+"?["+h_nums.천+"]\\s?)?" + "(?:"+digits+"?["+h_nums.백+"]\\s?)?" + "(?:"+digits+"?["+h_nums.십+"]\\s?)?" + digits + "?|[1-9][0-9]{0,3}"; numRegExp_thousand = new RegExp("^" + "(?:("+digits+"?["+h_nums.천+"])\\s?)?" + "(?:("+digits+"?["+h_nums.백+"])\\s?)?" + "(?:("+digits+"?["+h_nums.십+"])\\s?)?" + "("+digits+")?$"); numRegExp = new RegExp("(\\s+|^)" + "((?:"+thousands+")["+h_nums.경+"]\\s?)?" + "((?:"+thousands+")["+h_nums.조+"]\\s?)?" + "((?:"+thousands+")["+h_nums.억+"]\\s?)?" + "((?:"+thousands+")["+h_nums.만+"]\\s?)?" + "("+thousands+")?" +"($|\\s+)", 'g'); })(HANGUL_NUMBERS); var parseNumber_thousands = function(s){ if(!s) return 0; var m = s.match(numRegExp_thousand); if(m == null) return parseInt(s, 10) || 0; var x=0, i; for(i=1; i<=4; i++){ x *= 10; if(m[i]){ if(m[i].length == 1 && i < 4) x++; else if(m[i][0] in numReadMap) x += numReadMap[m[i][0]]; else x += parseInt(m[i][0], 10) || 0; } } return x; }, readNumber_thousands = function(n){ if(n<10) return HANGUL_NUMBERS[n][0]; return [0|n/1000, (0|n/100)%10, (0|n/10)%10, n%10].map(function(n, i){ if(n == 0) return ""; else if(n == 1) return "천백십일"[i]; else if(i == 3) return HANGUL_NUMBERS[n][0]; else return HANGUL_NUMBERS[n][0] + "천백십"[i]; }).join(' '); }; var root = this; var _old_hantools = this.HanTools; var HanTools = { 'CHOSEONG': CHOSEONG, 'JUNGSEONG': JUNGSEONG, 'JONGSEONG': JONGSEONG, 'JONGSEONG_EMPTY': "X", 'isHangul': function(s){ for(var i=0; i<s.length; i++) if(s[i] < HANGUL_FIRST || s[i] > HANGUL_LAST) return false; return true; }, 'toChoseong': function(s){ var r, i, c; if(s.length == 1){ c = s.charCodeAt(0); if(c >= HANGUL_FIRST_CODE && c <= HANGUL_LAST_CODE) return CHOSEONG[0|(c-HANGUL_FIRST_CODE)/(JUNGSEONG_LEN*JONGSEONG_LEN)]; else return s; } r = ""; for(i=0; i<s.length; i++){ c = s.charCodeAt(i); if(c >= HANGUL_FIRST_CODE && c <= HANGUL_LAST_CODE) r += CHOSEONG[0|(c-HANGUL_FIRST_CODE)/(JUNGSEONG_LEN*JONGSEONG_LEN)]; else r += s[i]; } return r; }, 'toJungseong': function(s){ var r, i, c; if(s.length == 1){ c = s.charCodeAt(0); if(c >= HANGUL_FIRST_CODE && c <= HANGUL_LAST_CODE) return JUNGSEONG[0|(c-HANGUL_FIRST_CODE)/JONGSEONG_LEN%JUNGSEONG_LEN]; else return s; } r = ""; for(i=0; i<s.length; i++){ c = s.charCodeAt(i); if(c >= HANGUL_FIRST_CODE && c <= HANGUL_LAST_CODE) r += JUNGSEONG[0|(c-HANGUL_FIRST_CODE)/JONGSEONG_LEN%JUNGSEONG_LEN]; else r += s[i]; } return r; }, 'toJongseong': function(s){ var r, i, c; if(s.length == 1){ c = s.charCodeAt(0); if(c >= HANGUL_FIRST_CODE && c <= HANGUL_LAST_CODE) return JONGSEONG[0|(c-HANGUL_FIRST_CODE)%JONGSEONG_LEN]; else return s; } r = ""; for(i=0; i<s.length; i++){ c = s.charCodeAt(i); if(c >= HANGUL_FIRST_CODE && c <= HANGUL_LAST_CODE) r += JONGSEONG[(c-HANGUL_FIRST_CODE)%JONGSEONG_LEN]; else r += s[i]; } return r; }, 'disintegrate': function(s){ var c = s.charCodeAt(0); if(c >= HANGUL_FIRST_CODE && c <= HANGUL_LAST_CODE){ c -= HANGUL_FIRST_CODE; if(c % JONGSEONG_LEN) return [ CHOSEONG[0|c/(JUNGSEONG_LEN*JONGSEONG_LEN)], JUNGSEONG[0|c/JONGSEONG_LEN%JUNGSEONG_LEN], JONGSEONG[c%JONGSEONG_LEN] ]; else return [ CHOSEONG[0|c/(JUNGSEONG_LEN*JONGSEONG_LEN)], JUNGSEONG[0|c/JONGSEONG_LEN%JUNGSEONG_LEN] ]; }else return s; }, 'compose': function(a){ var x, y, z; if(a.length == 0) return ""; // One component only if(a.length == 1 || !a[1] && !a[2]) return a[0] ? a[0] : ""; // No vowel y = JUNGSEONG.indexOf(a[1]); if(a.length > 3 || y == -1) return a.join(''); // Vowel only if((a.length == 2 || a[2] == null) && a[0] == null) return a[1]; x = CHOSEONG.indexOf(a[0]); if(x == -1) return a.join(''); z = a[2] ? JONGSEONG.indexOf(a[2]) : 0; if(z == -1) return a.join(''); return String.fromCharCode(HANGUL_FIRST_CODE + z + JONGSEONG_LEN*(y + JUNGSEONG_LEN*x)); }, 'dueum': function(s){ if(!s) return ''; var c = s.charCodeAt(0); if(c < HANGUL_FIRST_CODE || c > HANGUL_LAST_CODE) return s; switch(0|(c-HANGUL_FIRST_CODE)/JONGSEONG_LEN){ // 녀, 뇨, 뉴, 니 case 48: case 54: case 59: case 62: c += 5292; break; // 랴, 려, 례, 료, 류, 리 case 107: case 111: case 112: case 117: case 122: case 125: c += 3528; break; // 라, 래, 로, 뢰, 루, 르 case 105: case 106: case 113: case 116: case 118: case 123: c -= 1764; break; } return String.fromCharCode(c) + s.slice(1); }, 'josa': function(s, a){ if(!s) return a[0]; var c = s.charCodeAt(s.length-1); return a[+!((c-HANGUL_FIRST_CODE)%JONGSEONG_LEN)]; }, 'addJosa': function(s, a){ return s + HanTools.josa(s, a); }, 'parseNumber': function(s){ // need to find a better way... return parseInt(HanTools.replaceNumber(s)); }, 'replaceNumber': function(s){ return s.split(/([\[\]\(\)\^\+\-\*\/.,?!])/).map(function(s){ if(!s || !s.trim()) return s; try{ return s.replace(numRegExp, function(x, y, 경, 조, 억, 만, 일, z){ if(!경 && !조 && !억 && !만 && !일) return y+z; if(경){경 = 경.replace(/\s/g,'').slice(0,-1); if(!경) 경 = '1';} if(조){조 = 조.replace(/\s/g,'').slice(0,-1); if(!조) 조 = '1';} if(억){억 = 억.replace(/\s/g,'').slice(0,-1); if(!억) 억 = '1';} if(만){만 = 만.replace(/\s/g,'').slice(0,-1); if(!만) 만 = '1';} 경 = parseNumber_thousands(경); 조 = parseNumber_thousands(조); 억 = parseNumber_thousands(억); 만 = parseNumber_thousands(만); 일 = parseNumber_thousands(일); var num = 경*1e16 + 조*1e12 + 억*1e8 + 만*1e4 + 일; return y+(num+'')+z; }).replace(/(^|\s)영($|\s)/g, function(x, y, z){return y+'0'+z;}); }catch(e){ return s; } }).join(''); }, 'readNumber': function(n){ if(n < 10000) return readNumber_thousands(n); var i=0, a=[]; if(n%10000) a.unshift(readNumber_thousands(n%10000)); for(; n=Math.floor(n/10000); i++){ if(n%10000) a.unshift(readNumber_thousands(n%10000)+"만억조경"[i]) } return a.join(' '); }, 'noConflict': function(){ root.HanTools = _old_hantools; return HanTools; } }; if((typeof exports) != 'undefined'){ HanTools.Keyboard = require("./hantools-keyboard.js")(HanTools); if((typeof module) != 'undefined' && module.exports){ exports = module.exports = HanTools; } exports.HanTools = HanTools; }else{ if('HanTools_Keyboard' in root) HanTools.Keyboard = root.HanTools_Keyboard; root.HanTools = HanTools; } }).call(this);