UNPKG

phonemize

Version:

Fast phonemizer with rule-based G2P prediction. Pure JavaScript implementation.

2 lines (1 loc) 4.57 kB
var PUNCTUATION=`!"#$%&'()*+,-./:;<=>?@[\\]^_\`{|}~`,ARPABET_TO_IPA={AA:`ɑ`,AE:`æ`,AH:`ʌ`,AO:`ɔ`,AX:`ə`,AXR:`ɚ`,EH:`ɛ`,ER:`ɝ`,IH:`ɪ`,IY:`i`,UH:`ʊ`,UW:`u`,AW:`aʊ`,AY:`aɪ`,EY:`eɪ`,OW:`oʊ`,OY:`ɔɪ`,B:`b`,D:`d`,G:`ɡ`,K:`k`,P:`p`,T:`t`,DH:`ð`,F:`f`,HH:`h`,S:`s`,SH:`ʃ`,TH:`θ`,V:`v`,Z:`z`,ZH:`ʒ`,CH:`tʃ`,JH:`dʒ`,M:`m`,N:`n`,NG:`ŋ`,EL:`ɫ`,L:`l`,R:`ɹ`,W:`w`,Y:`j`,TS:`ts`,UO:`uo`,YE:`je`,UAH:`ua`,YEN:`jɛn`,YAW:`jaʊ`,YOW:`joʊ`,WAY:`waɪ`,UAHN:`uɑn`,UE:`yɛ`,UY:`ui`,UA:`ua`,EN:`ən`,IN:`in`,UN:`un`,OE:`ø`,AR:`ɑr`,HL:`ħl`,AB:`ɑb`,SAW:`ɔ`,KH:`kʰ`,PH:`pʰ`,NY:`ɲ`},IPA_STRESS_MAP={0:``,1:`ˈ`,2:`ˌ`},CHINESE_TONE_TO_ARROW={"˥˥":`→`,"˥":`→`,"˧˥":`↗`,"˧˩˧":`↓↗`,"˧˩":`↓`,"˥˩":`↘`,"˧":`→`,"˩":`↓`,"˩˥":`↗`,"˥˧":`↘`,"˧˧":`→`},IPA_TO_ARPABET=Object.fromEntries(Object.entries(ARPABET_TO_IPA).map(function(e){var n=e[0],r=e[1];return[r,n]})),IPA_TO_STRESS=Object.fromEntries(Object.entries(IPA_STRESS_MAP).map(function(e){var n=e[0],r=e[1];return[r,n]})),PINYIN_INITIALS_TO_ZHUYIN={b:`ㄅ`,p:`ㄆ`,m:`ㄇ`,f:`ㄈ`,d:`ㄉ`,t:`ㄊ`,n:`ㄋ`,l:`ㄌ`,g:`ㄍ`,k:`ㄎ`,h:`ㄏ`,j:`ㄐ`,q:`ㄑ`,x:`ㄒ`,zh:`ㄓ`,ch:`ㄔ`,sh:`ㄕ`,r:`ㄖ`,z:`ㄗ`,c:`ㄘ`,s:`ㄙ`,y:`ㄧ`,w:`ㄨ`},PINYIN_FINALS_TO_ZHUYIN={a:`ㄚ`,o:`ㄛ`,e:`ㄜ`,i:`ㄧ`,u:`ㄨ`,ü:`ㄩ`,v:`ㄩ`,ai:`ㄞ`,ei:`ㄟ`,ao:`ㄠ`,ou:`ㄡ`,an:`ㄢ`,en:`ㄣ`,ang:`ㄤ`,eng:`ㄥ`,ong:`ㄨㄥ`,er:`ㄦ`,ia:`ㄧㄚ`,ie:`ㄧㄝ`,iao:`ㄧㄠ`,iu:`ㄧㄡ`,iou:`ㄧㄡ`,ian:`ㄧㄢ`,in:`ㄧㄣ`,iang:`ㄧㄤ`,ing:`ㄧㄥ`,iong:`ㄧㄨㄥ`,ua:`ㄨㄚ`,uo:`ㄨㄛ`,uai:`ㄨㄞ`,ui:`ㄨㄟ`,uei:`ㄨㄟ`,uan:`ㄨㄢ`,un:`ㄨㄣ`,uen:`ㄨㄣ`,uang:`ㄨㄤ`,ueng:`ㄨㄥ`e:`ㄩㄝ`,ve:`ㄩㄝ`an:`ㄩㄢ`,van:`ㄩㄢ`n:`ㄩㄣ`,vn:`ㄩㄣ`,zhi:`ㄓ`,chi:`ㄔ`,shi:`ㄕ`,ri:`ㄖ`,zi:`ㄗ`,ci:`ㄘ`,si:`ㄙ`,yin:`ㄧㄣ`,yang:`ㄧㄤ`,yao:`ㄧㄠ`,ye:`ㄧㄝ`,yi:`ㄧ`,yo:`ㄧㄛ`,you:`ㄧㄡ`,yu:`ㄩ`,yue:`ㄩㄝ`,yun:`ㄩㄣ`,yuan:`ㄩㄢ`,wa:`ㄨㄚ`,wai:`ㄨㄞ`,wan:`ㄨㄢ`,wang:`ㄨㄤ`,wei:`ㄨㄟ`,wen:`ㄨㄣ`,weng:`ㄨㄥ`,wo:`ㄨㄛ`,wu:`ㄨ`};function ipaToArpabet(e){if(!e||typeof e!=`string`||!e.trim())return``;for(var n=[],r=0;r<e.length;){var i=e[r];if(IPA_TO_STRESS[i]){var s=IPA_TO_STRESS[i];r++;var c=getNextPhoneme(e,r);c&&(n.push(c.arpabet+s),r+=c.length);continue}var l=e.substring(r,r+2);if(IPA_TO_ARPABET[l]){n.push(IPA_TO_ARPABET[l]),r+=2;continue}if(IPA_TO_ARPABET[i]){n.push(IPA_TO_ARPABET[i]),r++;continue}i===` `?n.length>0&&n[n.length-1]!==` `&&n.push(` `):i.trim()&&n.push(`undefined`),r++}return n.join(` `).replace(/\s+/g,` `).trim()}function arpabetToIpa(e){if(!e||typeof e!=`string`||!e.trim())return``;for(var r=e.split(/\s+/).filter(function(e){return e.trim()}),i=[],a=!1,o=!1,s=0,c=r;s<c.length;s++){var l=c[s],u=l.match(/([012])$/),d=u?.[0]||``,f=l.replace(/[012]$/,``),p=ARPABET_TO_IPA[f];p?(i.push(p),d===`1`?a=!0:d===`2`&&(o=!0)):i.push(l)}var m=i.join(``);return a?m=`ˈ`+m:o&&(m=`ˌ`+m),m}function getNextPhoneme(e,n){var r=e.substring(n,n+2);if(IPA_TO_ARPABET[r])return{arpabet:IPA_TO_ARPABET[r],length:2};var i=e[n];return IPA_TO_ARPABET[i]?{arpabet:IPA_TO_ARPABET[i],length:1}:null}function convertChineseTonesToArrows(e){if(!e||typeof e!=`string`)return e;for(var n=e,r=Object.keys(CHINESE_TONE_TO_ARROW).sort(function(e,n){return n.length-e.length}),a=0,o=r;a<o.length;a++){var s=o[a],c=CHINESE_TONE_TO_ARROW[s];n=n.replace(new RegExp(s,`g`),c)}return n}function pinyinToZhuyin(e){if(!e?.trim())return e;var n=e.match(/([1-5])$/),r=n?n[1]:``,i=e.replace(/[1-5]$/,``);if(PINYIN_FINALS_TO_ZHUYIN[i])return PINYIN_FINALS_TO_ZHUYIN[i]+r;var a=decomposePinyinSyllable(i),o=a.initial,l=a.final,u=``;return o&&PINYIN_INITIALS_TO_ZHUYIN[o]&&(u+=PINYIN_INITIALS_TO_ZHUYIN[o]),l&&PINYIN_FINALS_TO_ZHUYIN[l]?u+=PINYIN_FINALS_TO_ZHUYIN[l]:l?(u=i,console.warn(`Could not find a Zhuyin mapping for pinyin final: ${l}`)):!l&&o&&(u=i),u+(r||`5`)}function decomposePinyinSyllable(e){if(!e?.trim())return{initial:``,final:``};if(e.startsWith(`zh`))return{initial:`zh`,final:e.slice(2)};if(e.startsWith(`ch`))return{initial:`ch`,final:e.slice(2)};if(e.startsWith(`sh`))return{initial:`sh`,final:e.slice(2)};for(var n=[`b`,`p`,`m`,`f`,`d`,`t`,`n`,`l`,`g`,`k`,`h`,`j`,`q`,`x`,`r`,`z`,`c`,`s`,`y`,`w`],r=0,i=n;r<i.length;r++){var a=i[r];if(e.startsWith(a))return{initial:a,final:e.slice(a.length)}}return{initial:``,final:e}}function resolveJson(e){return typeof e.default==`object`?e.default:e}export{PUNCTUATION as P,arpabetToIpa as a,convertChineseTonesToArrows as c,ipaToArpabet as i,pinyinToZhuyin as p,resolveJson as r};