UNPKG

pinyin-utils

Version:

Utilities to transform Pinyin syllables

119 lines (118 loc) 3.81 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.numberToMark = exports.markToNumber = exports.removeTone = exports.getToneNumber = exports.toneMarks = exports.codepointToUnicode = void 0; /** * Create a unicode character from the codepoint of a Chinese character * @param codepoint codepoint of Chinese character as number or string type * @example * ``` * codepointToUnicode(0x6211) // 我 * codepointToUnicode('0x6211') // 我 * codepointToUnicode('U+6211') // 我 * codepointToUnicode('6211') // 我 * ``` */ const codepointToUnicode = (codepoint) => { if (typeof codepoint === 'string') { let codepointStr = codepoint.replace('U+', ''); if (!/^0x/.test(codepointStr)) { codepointStr = '0x' + codepointStr; } return String.fromCodePoint(parseInt(codepointStr)); } return String.fromCodePoint(codepoint); }; exports.codepointToUnicode = codepointToUnicode; /** * Four tones: ` ̄` ` ́` ` ̌` ` ̀` */ exports.toneMarks = ['\u0304', '\u0301', '\u030c', '\u0300']; /** * Returns the tone number of a Pinyin syllable * @param text Pinyin syllable to get the tone number from * @example * ``` * getToneNumber('shì') // 4 * getToneNumber('shi4') // 4 * ``` */ const getToneNumber = (text) => { // Check for tone number const matches = text.match(/[a-zü](\d)/i); if (matches) return +matches[1]; // Check for tone mark for (let i = 0; i < exports.toneMarks.length; i++) { if (text.normalize('NFD').match(exports.toneMarks[i])) return i + 1; } // Return 5th tone as default return 5; }; exports.getToneNumber = getToneNumber; /** * Removes the tone mark/number from a Pinyin syllable * @param text Pinyin syllable to remove the tone mark/number from * @example * ``` * removeTone('wǒ') // wo * removeTone('wo3') // wo * ``` */ const removeTone = (text) => { text = text.normalize('NFD').replace(/\u0304|\u0301|\u030c|\u0300/g, ''); return text.normalize('NFC').replace(/(\w|ü)[1-5]/gi, '$1'); }; exports.removeTone = removeTone; function markToNumber(data, fithTone = true) { const process = (text) => { if (text.trim().length === 0) return text; if (fithTone) { return exports.removeTone(text) + exports.getToneNumber(text); } else { const tone = exports.getToneNumber(text); return tone === 5 ? exports.removeTone(text) : exports.removeTone(text) + tone; } }; if (Array.isArray(data)) { return data.map(process); } else { return process(data); } } exports.markToNumber = markToNumber; function numberToMark(data) { const process = (text) => { if (text.trim().length === 0) return text; const tone = exports.getToneNumber(text); text = exports.removeTone(text); if (tone !== 5) { if (text === 'm' || text === 'n' || text === 'M' || text === 'N') { return (text + exports.toneMarks[tone - 1]).normalize('NFC'); } const matchedVovels = text.match(/[aeiouü]/gi); if (matchedVovels) { let vovel = matchedVovels[matchedVovels.length - 1]; if (text.match('ou')) vovel = 'o'; if (text.match('a')) vovel = 'a'; if (text.match('e')) vovel = 'e'; return text.replace(vovel, vovel + exports.toneMarks[tone - 1]).normalize('NFC'); } } return text; }; if (Array.isArray(data)) { return data.map(process); } else { return process(data); } } exports.numberToMark = numberToMark;