pinyin-tone
Version:
A library for converting pinyin tone numbers into pinyin tone marks.
306 lines (245 loc) • 9.35 kB
JavaScript
'use strict';
// 汉语拼音方案
// http://www.moe.gov.cn/jyb_sjzl/ziliao/A19/195802/t19580201_186000.html
// http://www.moe.gov.cn/ewebeditor/uploadfile/2015/03/02/20150302165814246.pdf
const regexp_Pinyin = /^(er[0-5]?|e[^0-5]|([zcs]h?|[bpmfdtnlgkhjqxywr])?(i[ao]ng|u[ae]ng|[aeo]ng|ia[on]|iou|ing|u[ae][in]|van|a[ion]?|ou?|e[in]?|i[aeun]?|u[aoein]?|v[en]?)(r[0-5]|[r0-5])?|ng|[zcs]h?|[bpmfdtnlgkhjqxywr])$/;
const _韵母表 = {
a: ['a', 'ā', 'á', 'ǎ', 'à'], // a
o: ['o', 'ō', 'ó', 'ǒ', 'ò'], // o
e: ['e', 'ē', 'é', 'ě', 'è'], // e
ai: ['ai', 'āi', 'ái', 'ǎi', 'ài'],
ei: ['ei', 'ēi', 'éi', 'ěi', 'èi'],
ao: ['ao', 'āo', 'áo', 'ǎo', 'ào'],
ou: ['ou', 'ōu', 'óu', 'ǒu', 'òu'],
an: ['an', 'ān', 'án', 'ǎn', 'àn'],
en: ['en', 'ēn', 'én', 'ěn', 'èn'],
ang: ['ang', 'āng', 'áng', 'ǎng', 'àng'],
eng: ['eng', 'ēng', 'éng', 'ěng', 'èng'],
ong: ['ong', 'ōng', 'óng', 'ǒng', 'òng'],
i: ['i', 'ī', 'í', 'ǐ', 'ì'], // i
ia: ['ia', 'iā', 'iá', 'iǎ', 'ià'],
ie: ['ie', 'iē', 'ié', 'iě', 'iè'],
iao: ['iao', 'iāo', 'iáo', 'iǎo', 'iào'],
iou: ['iou', 'iōu', 'ióu', 'iǒu', 'iòu'],
iu: ['iu', 'iū', 'iú', 'iǔ', 'iù'],
ian: ['ian', 'iān', 'ián', 'iǎn', 'iàn'],
in: ['in', 'īn', 'ín', 'ǐn', 'ìn'],
iang: ['iang', 'iāng', 'iáng', 'iǎng', 'iàng'],
ing: ['ing', 'īng', 'íng', 'ǐng', 'ìng'],
iong: ['iong', 'iōng', 'ióng', 'iǒng', 'iòng'],
ui: ['ui', 'uī', 'uí', 'uǐ', 'uì'], //ui
u: ['u', 'ū', 'ú', 'ǔ', 'ù'], // u
ua: ['ua', 'uā', 'uá', 'uǎ', 'uà'],
uo: ['uo', 'uō', 'uó', 'uǒ', 'uò'],
uai: ['uai', 'uāi', 'uái', 'uǎi', 'uài'],
uei: ['uei', 'uēi', 'uéi', 'uěi', 'uèi'],
uan: ['uan', 'uān', 'uán', 'uǎn', 'uàn'],
uen: ['uen', 'uēn', 'uén', 'uěn', 'uèn'],
un: ['un', 'ūn', 'ún', 'ǔn', 'ùn'],
uang: ['uang', 'uāng', 'uáng', 'uǎng', 'uàng'],
ueng: ['ueng', 'uēng', 'uéng', 'uěng', 'uèng'],
ü: ['ü', 'ǖ', 'ǘ', 'ǚ', 'ǜ'], // ü
v: ['ü', 'ǖ', 'ǘ', 'ǚ', 'ǜ'], // ü
üe: ['üe', 'üē', 'üé', 'üě', 'üè'],
ve: ['üe', 'üē', 'üé', 'üě', 'üè'],
ue: ['ue', 'uē', 'ué', 'uě', 'uè'],
üan: ['üan', 'üān', 'üán', 'üǎn', 'üàn'],
van: ['üan', 'üān', 'üán', 'üǎn', 'üàn'],
ün: ['ün', 'ǖn', 'ǘn', 'ǚn', 'ǜn'],
vn: ['ün', 'ǖn', 'ǘn', 'ǚn', 'ǜn'],
};
const r = "r";
/**
* Convert "letter + number" to "Pinyin tone"
* @param {string} args like "ma1 ma2 ma3 ma4 ma"
* @return {string} like "mā má mǎ mà ma"
*/
function pinyin_v1(args) {
// args must be string, or return itself
if (typeof args !== 'string') {
return args;
}
// 分词
let fragments = args.split(' ');
// 是否是合规则的拼音
const split_results = fragments.map(itself => {
// 匹配regexp
const exec_array = regexp_Pinyin.exec(itself)
// 如果不匹配
if (!Array.isArray(exec_array)) {
return itself;
}
// 这里开始是匹配的情形
const [_, _特殊, _声母, _韵母, _儿化音和声调] = exec_array;
// 特殊情况按整体处理
switch (_特殊) {
// \er/
case "er":
case "er0":
case "er5":
return "er";
case "er1":
return "ēr";
case "er2":
return "ér";
case "er3":
return "ěr"
case "er4":
return "èr"
// \/
// case "e5":
case "e^":
return "ê";
case "ng":
return "ng";
// 仅有\声母/
case "b":
case "p":
case "m":
case "f":
case "d":
case "t":
case "n":
case "l":
case "g":
case "k":
case "h":
case "j":
case "q":
case "x":
case "z":
case "c":
case "s":
case "r":
case "y":
case "w":
case "zh":
case "ch":
case "sh":
return _特殊;
}
// 如果没有声母,只有韵母和其他部分,按原样显示
if (typeof _声母 === "undefined") {
// 如果在韵母表中不存在
if (typeof _韵母表[_韵母] === "undefined") {
return itself;
}
// 这里开始有韵母……
// 如果没有声调和儿化音
if (typeof _儿化音和声调 === "undefined") {
return _韵母表[_韵母][0];
}
// 这里开始有声调和儿化音
switch (_儿化音和声调) {
// 0=无声调、轻声
case "":
case "0":
case "5":
return _韵母表[_韵母][0];
case "r":
case "5r":
case "r5":
return _韵母表[_韵母][0] + r;
// 1=阴平
case "1":
return _韵母表[_韵母][1];
case "r1":
case "1r":
return _韵母表[_韵母][1] + r;
// 2=阳平
case "2":
return _韵母表[_韵母][2];
case "2r":
case "r2":
return _韵母表[_韵母][2] + r;
// 3=上声
case "3":
return _韵母表[_韵母][3];
case "3r":
case "r3":
return _韵母表[_韵母][3] + r;
// 4=去声
case "4":
return _韵母表[_韵母][4];
case "4r":
case "r4":
return _韵母表[_韵母][4] + r;
// ?=意料之外
default:
return itself;
}
} else {
// 有声母也有韵母
switch (_韵母) {
case "v":
if (_声母 === "j" || _声母 === "q" || _声母 === "x" || _声母 === "y") {
return _声母 + alter(_儿化音和声调, "u ū ú ǔ ù".trim().split(" "));
}
return _声母 + alter(_儿化音和声调, "ü ǖ ǘ ǚ ǜ".trim().split(" "));
case "van":
if (_声母 === "j" || _声母 === "q" || _声母 === "x" || _声母 === "y") {
return _声母 + alter(_儿化音和声调, "uan uān uán uǎn uàn".trim().split(" "));
}
return _声母 + alter(_儿化音和声调, "üan üān üán üǎn üàn".trim().split(" "));
case "ve":
if (_声母 === "j" || _声母 === "q" || _声母 === "x" || _声母 === "y") {
return _声母 + alter(_儿化音和声调, "ue uē ué uě uè".trim().split(" "));///
}
return _声母 + alter(_儿化音和声调, "üe üē üé üě üè".trim().split(" "));
case "uei":
return _声母 + alter(_儿化音和声调, "ui uī uí uǐ uì".trim().split(" "));
case "uen":
return _声母 + alter(_儿化音和声调, "un ūn ún ǔn ùn".trim().split(" "));
case "iou":
return _声母 + alter(_儿化音和声调, "iu iū iú iǔ iù".trim().split(" "));
default:
return _声母 + alter(_儿化音和声调, _韵母表[_韵母]);
}
}
});
return split_results.join(' ');
}
/**
*
* @param {string} _儿化音和声调
* @param {string[]} _韵母
*/
function alter(_儿化音和声调, _韵母) {
// assert.strictEqual(_韵母.length, 5);
switch (_儿化音和声调) {
// 轻声=""|0
case "":
case "0":
case "5":
return _韵母[0];
case "r":
case "r0":
case "0r":
case "r5":
case "5r":
return _韵母[0] + r;
case "1":
return _韵母[1];
case "r1":
case "1r":
return _韵母[1] + r;
case "2":
return _韵母[2];
case "r2":
case "2r":
return _韵母[2] + r;
case "3":
return _韵母[3];
case "r3":
case "3r":
return _韵母[3] + r;
case "4":
return _韵母[4];
case "r4":
case "4r":
return _韵母[4] + r;
default:
return _韵母[0];
}
}
export default pinyin_v1;