japanese-string-utils
Version:
The utils convert Japanese strings to other forms, such as Hiragana, Katakana, Full-width, Half-width, numeric and others.
7 lines • 9.17 kB
JavaScript
/*!
* japanese-string-utils
* https://github.com/yomotsu/japanese-string-utils
* (c) 2018 @yomotsu
* Released under the MIT License.
*/
(function(global,factory){typeof exports==="object"&&typeof module!=="undefined"?factory(exports):typeof define==="function"&&define.amd?define(["exports"],factory):(global=typeof globalThis!=="undefined"?globalThis:global||self,factory(global.japaneseStringUtils={}))})(this,(function(exports){"use strict";function toAscii(value){const charArray=[];for(let i=value.length-1;0<=i;i--){const charCode=charArray[i]=value.charCodeAt(i);switch(true){case charCode<=65374&&65281<=charCode:charArray[i]-=65248;break;case charCode===12288:charArray[i]=32;break;case charCode===12540:charArray[i]=45;break}}return String.fromCharCode.apply(null,charArray)}function toFullwidth(value){const charArray=[];for(let i=value.length-1;0<=i;i--){const charCode=charArray[i]=value.charCodeAt(i);switch(true){case charCode<=126&&33<=charCode:charArray[i]+=65248;break;case charCode===32:charArray[i]=12288;break}}return String.fromCharCode.apply(null,charArray)}const DAKUTEN=12443;const HAN_DAKUTEN=12444;function toNFC(value){const charArray=[];for(let i=0;i<value.length;i++){const charCode=value.charCodeAt(i);switch(true){case 12363<=charCode&&charCode<=12386&&charCode%2===1:case 12459<=charCode&&charCode<=12482&&charCode%2===1:case 12388<=charCode&&charCode<=12393&&charCode%2===0:case 12484<=charCode&&charCode<=12489&&charCode%2===0:{const nextChar=value.charCodeAt(i+1);charArray.push(charCode+(nextChar===DAKUTEN?1:0));if(charArray[charArray.length-1]!==charCode)i++;break}case 12399<=charCode&&charCode<=12415&&charCode%3===0:case 12495<=charCode&&charCode<=12509&&charCode%3===0:{const nextChar=value.charCodeAt(i+1);charArray.push(charCode+(nextChar===DAKUTEN?1:nextChar===HAN_DAKUTEN?2:0));if(charArray[charArray.length-1]!==charCode)i++;break}case 12358===charCode||12454===charCode:{const nextChar=value.charCodeAt(i+1);charArray.push(charCode+(nextChar===DAKUTEN?78:0));if(charArray[charArray.length-1]!=charCode)i++;break}default:charArray.push(charCode);break}}return String.fromCharCode.apply(null,charArray)}const fullwidthKanaMap={65382:12530,65383:12449,65384:12451,65385:12453,65386:12455,65387:12457,65388:12515,65389:12517,65390:12519,65391:12483,65392:12540,65393:12450,65394:12452,65395:12454,65396:12456,65397:12458,65398:12459,65399:12461,65400:12463,65401:12465,65402:12467,65403:12469,65404:12471,65405:12473,65406:12475,65407:12477,65408:12479,65409:12481,65410:12484,65411:12486,65412:12488,65413:12490,65414:12491,65415:12492,65416:12493,65417:12494,65418:12495,65419:12498,65420:12501,65421:12504,65422:12507,65423:12510,65424:12511,65425:12512,65426:12513,65427:12514,65428:12516,65429:12518,65430:12520,65431:12521,65432:12522,65433:12523,65434:12524,65435:12525,65436:12527,65437:12531,65438:12443,65439:12444};function toFullwidthKana(value){const charArray=[];for(let i=value.length-1;0<=i;i--){const charCode=value.charCodeAt(i);if(fullwidthKanaMap[charCode]){charArray[i]=fullwidthKanaMap[charCode]}else{charArray[i]=charCode}}return toNFC(String.fromCharCode.apply(null,charArray))}const halfwidthKanaMap={12449:65383,12451:65384,12453:65385,12455:65386,12457:65387,12515:65388,12517:65389,12519:65390,12483:65391,12540:65392,12450:65393,12452:65394,12454:65395,12456:65396,12458:65397,12459:65398,12461:65399,12463:65400,12465:65401,12467:65402,12469:65403,12471:65404,12473:65405,12475:65406,12477:65407,12479:65408,12481:65409,12484:65410,12486:65411,12488:65412,12490:65413,12491:65414,12492:65415,12493:65416,12494:65417,12495:65418,12498:65419,12501:65420,12504:65421,12507:65422,12510:65423,12511:65424,12512:65425,12513:65426,12514:65427,12516:65428,12518:65429,12520:65430,12521:65431,12522:65432,12523:65433,12524:65434,12525:65435,12527:65436,12531:65437,12443:65438,12444:65439,12530:65382};function toHalfwidthKana(value){const charArray=[];for(let i=0;i<value.length;i++){const charCode=value.charCodeAt(i);switch(true){case charCode in halfwidthKanaMap:charArray.push(halfwidthKanaMap[charCode]);break;case 12459<=charCode&&charCode<=12489:charArray.push(halfwidthKanaMap[charCode-1],65438);break;case 12495<=charCode&&charCode<=12509:charArray.push(halfwidthKanaMap[charCode-charCode%3],[65438,65439][charCode%3-1]);break;case 12532===charCode:charArray.push(65395,65438);break;default:charArray.push(charCode);break}}return String.fromCharCode.apply(null,charArray)}function toHiragana(value){const charArray=[];for(let i=value.length-1;0<=i;i--){const charCode=value.charCodeAt(i);charArray[i]=12449<=charCode&&charCode<=12534?charCode-96:charCode}return String.fromCharCode.apply(null,charArray)}function toKatakana(value){const charArray=[];for(let i=value.length-1;0<=i;i--){const charCode=value.charCodeAt(i);if(12353<=charCode&&charCode<=12438){charArray[i]=charCode+96}else{charArray[i]=charCode}}return String.fromCharCode.apply(null,charArray)}function toNumeric(value){const asciiString=toAscii(value).replace(/[^0-9.-]/g,"").replace(/(?!^)-/g,"").replace(/\.+/,".").replace(/^(-)?0+/,"$10").replace(/^(-)?0([1-9]+)/,"$1$2");const contains2MoreDot=/\..*\./.test(asciiString);if(!contains2MoreDot)return asciiString;const array=asciiString.split(".");const intPart=array.shift();const fractPart=array.join("");if(fractPart)return`${intPart}.${fractPart}`;return intPart}const ONE="一";const normalizeMap={0:"〇","0":"〇","零":"〇",1:ONE,"1":ONE,"壱":ONE,"壹":ONE,"弌":ONE,2:"二","2":"二","弐":"二","貳":"二",3:"三","3":"三","参":"三","參":"三",4:"四","4":"四","肆":"四",5:"五","5":"五","伍":"五",6:"六","6":"六","陸":"六",7:"七","7":"七","漆":"七","柒":"七","質":"七",8:"八","8":"八","捌":"八",9:"九","9":"九","玖":"九","拾":"十","廿":"二十","卅":"三十","丗":"三十","卌":"四十","佰":"百","陌":"百","仟":"千","阡":"千","萬":"万",".":".","。":".","・":".","ー":"-","−":"-","+":"+"};const needsNormalizePattern=new RegExp(`[${Object.keys(normalizeMap).join("|")}]`,"g");const basicNumber={"〇":0,"一":1,"二":2,"三":3,"四":4,"五":5,"六":6,"七":7,"八":8,"九":9};const basicDigit={"十":10,"百":100,"千":1e3};const bigDigit={"万":1e4,"億":1e8,"兆":1e12,"京":1e16};const basicNumberPattern=new RegExp(`[${Object.keys(basicNumber).join("|")}]`);const basicNumberWithDotPattern=new RegExp(`[${[...Object.keys(basicNumber),"."].join("|")}]`);const basicDigitPattern=new RegExp(`[${Object.keys(basicDigit).join("|")}]`);const bigDigitPattern=new RegExp(`[${Object.keys(bigDigit).join("|")}]`);function toNumericFromKanji(value){let normalizedValue=value.trim();const matched=value.match(needsNormalizePattern);matched&&matched.forEach((char=>{normalizedValue=normalizedValue.replace(char,normalizeMap[char])}));const signMatched=normalizedValue.match(/^([+-])/);const sign=signMatched?signMatched[1]:"";normalizedValue=normalizedValue.replace(new RegExp(`[^${[".",...Object.keys(basicNumber),...Object.keys(basicDigit),...Object.keys(bigDigit)]}]`,"g"),"");if(normalizedValue==="")return"";const chunks=[{letters:[],digit:1}];let currentBigDigit=1;for(let i=normalizedValue.length-1;i>=0;i--){const currentChunk=chunks[chunks.length-1];if(basicNumberWithDotPattern.test(normalizedValue[i])){currentChunk.letters.unshift(normalizedValue[i]);continue}if(basicDigitPattern.test(normalizedValue[i])){const hasLeadNumber=normalizedValue[i-1]&&basicNumberPattern.test(normalizedValue[i-1]);const leadNumber=hasLeadNumber?normalizedValue[i-1]:ONE;chunks.push({letters:[leadNumber],digit:basicDigit[normalizedValue[i]]*currentBigDigit});if(hasLeadNumber)i--;continue}if(bigDigitPattern.test(normalizedValue[i])){currentBigDigit=bigDigit[normalizedValue[i]];chunks.push({letters:[],digit:currentBigDigit});continue}}const numbers=chunks.reduce(((acc,current)=>{const letters=current.letters.join("")||"0";const numbers=+toNumeric(letters.split("").map((char=>basicNumber[char]!==undefined?basicNumber[char]:char)).join(""));return acc+numbers*current.digit}),0);return`${sign}${numbers}`}toNumericFromKanji.validLetters=[...new Set([",",...Object.entries(normalizeMap).flat(),...Object.keys(basicNumber),...Object.keys(basicDigit),...Object.keys(bigDigit)])];function addCommas(numericString){const rgx=/(\d+)(\d{3})/;const x=String(numericString).split(".");let integerPart=x[0];const fractionalPart=x.length>1?"."+x[1]:"";while(rgx.test(integerPart)){integerPart=integerPart.replace(rgx,"$1"+","+"$2")}return integerPart+fractionalPart}const NORMALIZED="‐";const HYPHEN_LIKE_PATTERN=new RegExp(["-","﹣","-","","‐","‑","‒","–","—","―","⁃","−","─","━","ー","ー"].join("|"),"g");function normalizeHyphens(value,replacement=NORMALIZED){return value.replace(HYPHEN_LIKE_PATTERN,replacement)}exports.addCommas=addCommas;exports.normalizeHyphens=normalizeHyphens;exports.toAscii=toAscii;exports.toFullwidth=toFullwidth;exports.toFullwidthKana=toFullwidthKana;exports.toHalfwidthKana=toHalfwidthKana;exports.toHiragana=toHiragana;exports.toKatakana=toKatakana;exports.toNFC=toNFC;exports.toNumeric=toNumeric;exports.toNumericFromKanji=toNumericFromKanji}));