UNPKG

pinyin-zhuyin

Version:

Library for converting from pinyin to zhuyin

293 lines (292 loc) 7.44 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.pinyinToZhuyin = void 0; const initials = { b: 'ㄅ', p: 'ㄆ', m: 'ㄇ', f: 'ㄈ', d: 'ㄉ', t: 'ㄊ', n: 'ㄋ', l: 'ㄌ', g: 'ㄍ', k: 'ㄎ', h: 'ㄏ', j: 'ㄐ', q: 'ㄑ', x: 'ㄒ', zh: 'ㄓ', ch: 'ㄔ', sh: 'ㄕ', r: 'ㄖ', z: 'ㄗ', c: 'ㄘ', s: 'ㄙ', }; const finals = { a: 'ㄚ', o: 'ㄛ', e: 'ㄜ', ai: 'ㄞ', ei: 'ㄟ', ao: 'ㄠ', ou: 'ㄡ', an: 'ㄢ', ang: 'ㄤ', en: 'ㄣ', eng: 'ㄥ', er: 'ㄦ', u: 'ㄨ', ua: 'ㄨㄚ', uo: 'ㄨㄛ', uai: 'ㄨㄞ', ui: 'ㄨㄟ', uan: 'ㄨㄢ', uang: 'ㄨㄤ', un: 'ㄨㄣ', //This one might not occur. ueng: 'ㄨㄥ', ong: 'ㄨㄥ', i: 'ㄧ', ia: 'ㄧㄚ', ie: 'ㄧㄝ', iao: 'ㄧㄠ', iu: 'ㄧㄡ', ian: 'ㄧㄢ', iang: 'ㄧㄤ', in: 'ㄧㄣ', ing: 'ㄧㄥ', ü: 'ㄩ', üe: 'ㄩㄝ', ue: 'ㄩㄝ', üan: 'ㄩㄢ', ün: 'ㄩㄣ', iong: 'ㄩㄥ', }; const individuals = { //individual initials zhi: 'ㄓ', chi: 'ㄔ', shi: 'ㄕ', ri: 'ㄖ', zi: 'ㄗ', ci: 'ㄘ', si: 'ㄙ', //individual finals a: 'ㄚ', o: 'ㄛ', e: 'ㄜ', ai: 'ㄞ', ei: 'ㄟ', ao: 'ㄠ', ou: 'ㄡ', an: 'ㄢ', ang: 'ㄤ', en: 'ㄣ', eng: 'ㄥ', er: 'ㄦ', r: 'ㄦ', wu: 'ㄨ', wa: 'ㄨㄚ', wo: 'ㄨㄛ', wai: 'ㄨㄞ', wei: 'ㄨㄟ', wan: 'ㄨㄢ', wang: 'ㄨㄤ', wen: 'ㄨㄣ', weng: 'ㄨㄥ', yi: 'ㄧ', ya: 'ㄧㄚ', ye: 'ㄧㄝ', yao: 'ㄧㄠ', you: 'ㄧㄡ', yan: 'ㄧㄢ', yang: 'ㄧㄤ', yin: 'ㄧㄣ', ying: 'ㄧㄥ', yu: 'ㄩ', yue: 'ㄩㄝ', yuan: 'ㄩㄢ', yun: 'ㄩㄣ', yong: 'ㄩㄥ', }; const toneMap = { ā: 'a1', á: 'a2', ǎ: 'a3', à: 'a4', ē: 'e1', é: 'e2', ě: 'e3', è: 'e4', ī: 'i1', í: 'i2', ǐ: 'i3', ì: 'i4', ō: 'o1', ó: 'o2', ǒ: 'o3', ò: 'o4', ū: 'u1', ú: 'u2', ǔ: 'u3', ù: 'u4', ǖ: 'ü1', ǘ: 'ü2', ǚ: 'ü3', ǜ: 'ü4', }; const findAccentedChars = function (text) { const accentsFound = {}; for (let i = 0; i < text.length; i++) { for (const accentedChar in toneMap) { if (text[i].toLowerCase() === accentedChar) { if (text[i].toLowerCase() === text[i]) { accentsFound[i] = toneMap[accentedChar]; } else { accentsFound[i] = toneMap[accentedChar].toUpperCase(); } } } } return accentsFound; }; const removeAccents = function (accentedChars, text) { let output = ''; for (let i = 0; i < text.length; i++) { if (i in accentedChars) { output += accentedChars[i][0]; } else { output += text[i]; } } return output; }; const getKeys = function (obj) { const output = []; for (const key in obj) { output.push(key); } return output; }; const findBetween = function (list, min, max) { let i = 0; while (i < list.length) { if (list[i] > max) break; if (list[i] >= min) { return list[i]; } i++; } return -1; }; const toLower = function (x) { if (x) return x.toLowerCase(); }; //sort the regex options by length so the longer ones have precedence const lenComp = function (a, b) { if (a.length === b.length) return 0; return a.length < b.length ? 1 : -1; }; const individualRexp = new RegExp('^(' + getKeys(individuals).sort(lenComp).join('|') + ')(\\d)?', 'i'); const initialFinalRexp = new RegExp('^(' + getKeys(initials).sort(lenComp).join('|') + ')(' + getKeys(finals).sort(lenComp).join('|') + ')(\\d)?', 'i'); const toneNumberToSymbol = { 0: '˙', 1: '', 2: 'ˊ', 3: 'ˇ', 4: 'ˋ', 5: '˙', }; const pinyinToZhuyin = function (pinyinText) { if (!pinyinText) return pinyinText; const accentedChars = findAccentedChars(pinyinText); const sortedAccentedIndicies = getKeys(accentedChars).map(function (x) { return parseInt(x, 10); }); const text = removeAccents(accentedChars, pinyinText); const parseToken = function (i) { let parse, detectedToneIdx; const token = { start: i, }; parse = text.slice(i).match(initialFinalRexp); if (parse) { parse = parse.map(toLower); token.zhuyin = initials[parse[1]] + finals[parse[2]]; token.type = 'pinyin'; if (typeof parse[3] !== 'undefined') { token.tone = parseInt(parse[3], 10); } else { detectedToneIdx = findBetween(sortedAccentedIndicies, i, i + parse[0].length); if (detectedToneIdx >= 0) { token.tone = +accentedChars[detectedToneIdx][1]; } else { token.tone = 5; } } } else { parse = text.slice(i).match(individualRexp); if (parse) { parse = parse.map(toLower); token.zhuyin = individuals[parse[1]]; token.type = 'pinyin'; if (typeof parse[2] !== 'undefined') { token.tone = parseInt(parse[2], 10); } else { detectedToneIdx = findBetween(sortedAccentedIndicies, i, i + parse[0].length); if (detectedToneIdx >= 0) { token.tone = +accentedChars[detectedToneIdx][1]; } else { token.tone = 5; } } } else { token.type = 'other'; parse = [text[i]]; } } token.parse = parse; return token; }; const tokens = []; let curToken; let i = 0; while (i < text.length) { curToken = parseToken(i); tokens.push(curToken); i += curToken.parse[0].length; } return tokens .map(function (token) { if (token.type === 'other') return token.parse.join(''); return token.zhuyin + toneNumberToSymbol[token.tone]; }) .join('') .replace(/ㄐㄨ/g, 'ㄐㄩ') .replace(/ㄑㄨ/g, 'ㄑㄩ') .replace(/ㄒㄨ/g, 'ㄒㄩ') //ju qu xu are actually pronounced as ü .replace(/ㄓㄧ/g, 'ㄓ') .replace(/ㄔㄧ/g, 'ㄔ') .replace(/ㄕㄧ/g, 'ㄕ') .replace(/ㄖㄧ/g, 'ㄖ') // zhi chi shi ri .replace(/ㄗㄧ/g, 'ㄗ') .replace(/ㄘㄧ/g, 'ㄘ') .replace(/ㄙㄧ/g, 'ㄙ') // zi ci si .replace(/\u200b'/g, ''); // pinyin syllable separator not necessary }; exports.pinyinToZhuyin = pinyinToZhuyin;