pinyin-utils
Version:
Utilities to transform Pinyin syllables
119 lines (118 loc) • 3.81 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.numberToMark = exports.markToNumber = exports.removeTone = exports.getToneNumber = exports.toneMarks = exports.codepointToUnicode = void 0;
/**
* Create a unicode character from the codepoint of a Chinese character
* @param codepoint codepoint of Chinese character as number or string type
* @example
* ```
* codepointToUnicode(0x6211) // 我
* codepointToUnicode('0x6211') // 我
* codepointToUnicode('U+6211') // 我
* codepointToUnicode('6211') // 我
* ```
*/
const codepointToUnicode = (codepoint) => {
if (typeof codepoint === 'string') {
let codepointStr = codepoint.replace('U+', '');
if (!/^0x/.test(codepointStr)) {
codepointStr = '0x' + codepointStr;
}
return String.fromCodePoint(parseInt(codepointStr));
}
return String.fromCodePoint(codepoint);
};
exports.codepointToUnicode = codepointToUnicode;
/**
* Four tones: ` ̄` ` ́` ` ̌` ` ̀`
*/
exports.toneMarks = ['\u0304', '\u0301', '\u030c', '\u0300'];
/**
* Returns the tone number of a Pinyin syllable
* @param text Pinyin syllable to get the tone number from
* @example
* ```
* getToneNumber('shì') // 4
* getToneNumber('shi4') // 4
* ```
*/
const getToneNumber = (text) => {
// Check for tone number
const matches = text.match(/[a-zü](\d)/i);
if (matches)
return +matches[1];
// Check for tone mark
for (let i = 0; i < exports.toneMarks.length; i++) {
if (text.normalize('NFD').match(exports.toneMarks[i]))
return i + 1;
}
// Return 5th tone as default
return 5;
};
exports.getToneNumber = getToneNumber;
/**
* Removes the tone mark/number from a Pinyin syllable
* @param text Pinyin syllable to remove the tone mark/number from
* @example
* ```
* removeTone('wǒ') // wo
* removeTone('wo3') // wo
* ```
*/
const removeTone = (text) => {
text = text.normalize('NFD').replace(/\u0304|\u0301|\u030c|\u0300/g, '');
return text.normalize('NFC').replace(/(\w|ü)[1-5]/gi, '$1');
};
exports.removeTone = removeTone;
function markToNumber(data, fithTone = true) {
const process = (text) => {
if (text.trim().length === 0)
return text;
if (fithTone) {
return exports.removeTone(text) + exports.getToneNumber(text);
}
else {
const tone = exports.getToneNumber(text);
return tone === 5 ? exports.removeTone(text) : exports.removeTone(text) + tone;
}
};
if (Array.isArray(data)) {
return data.map(process);
}
else {
return process(data);
}
}
exports.markToNumber = markToNumber;
function numberToMark(data) {
const process = (text) => {
if (text.trim().length === 0)
return text;
const tone = exports.getToneNumber(text);
text = exports.removeTone(text);
if (tone !== 5) {
if (text === 'm' || text === 'n' || text === 'M' || text === 'N') {
return (text + exports.toneMarks[tone - 1]).normalize('NFC');
}
const matchedVovels = text.match(/[aeiouü]/gi);
if (matchedVovels) {
let vovel = matchedVovels[matchedVovels.length - 1];
if (text.match('ou'))
vovel = 'o';
if (text.match('a'))
vovel = 'a';
if (text.match('e'))
vovel = 'e';
return text.replace(vovel, vovel + exports.toneMarks[tone - 1]).normalize('NFC');
}
}
return text;
};
if (Array.isArray(data)) {
return data.map(process);
}
else {
return process(data);
}
}
exports.numberToMark = numberToMark;
;