phonemize
Version:
Fast phonemizer with rule-based G2P prediction. Pure JavaScript implementation.
2 lines (1 loc) • 6.17 kB
JavaScript
;var pinyinPro=require(`pinyin-pro`),utils=require(`./utils-DlSqmN3S.cjs`),dict={還:[`hái`,.0011],還有:[`hái yǒu`,85e-5],還是:[`hái shì`,42e-5],還沒:[`hái méi`,31e-5],還好:[`hái hǎo`,28e-5],還要:[`hái yào`,25e-5],還會:[`hái huì`,22e-5],還能:[`hái néng`,19e-5],還可以:[`hái kě yǐ`,16e-5],還不:[`hái bù`,14e-5],還來:[`hái lái`,11e-5],還在:[`hái zài`,87e-6],還給:[`huán gěi`,62e-6],還原:[`huán yuán`,18e-6],還錢:[`huán qián`,15e-6],還債:[`huán zhài`,12e-6],還書:[`huán shū`,89e-7],還手:[`huán shǒu`,73e-7],還擊:[`huán jī`,61e-7],還禮:[`huán lǐ`,48e-7],還願:[`huán yuàn`,42e-7],還鄉:[`huán xiāng`,37e-7],還你:[`huán nǐ`,32e-7],還俗:[`huán sú`,25e-7],還魂:[`huán hún`,21e-7],還報:[`huán bào`,18e-7],還清:[`huán qīng`,15e-7],還口:[`huán kǒu`,12e-7],還嘴:[`huán zuǐ`,98e-8],還目:[`huán mù`,55e-8],的確:[`dí què`,21e-5],目的:[`mù dì`,18e-5],的士:[`dí shì`,32e-6],著急:[`zháo jí`,19e-5],著火:[`zháo huǒ`,42e-6],著涼:[`zháo liáng`,28e-6],著手:[`zhuó shǒu`,31e-6],著眼:[`zhuó yǎn`,23e-6],著重:[`zhuó zhòng`,54e-6],著作:[`zhuó zuò`,47e-6],著名:[`zhù míng`,89e-6],了解:[`liǎo jiě`,32e-5],了不起:[`liǎo bù qǐ`,68e-6],了事:[`liǎo shì`,19e-6],行業:[`háng yè`,24e-5],行列:[`háng liè`,43e-6],銀行:[`yín háng`,31e-5],行走:[`xíng zǒu`,37e-6],行為:[`xíng wéi`,28e-5],行動:[`xíng dòng`,21e-5],數字:[`shù zì`,18e-5],數量:[`shù liàng`,23e-5],數學:[`shù xué`,19e-5],數不清:[`shǔ bù qīng`,21e-6],數落:[`shǔ luò`,14e-6],種類:[`zhǒng lèi`,15e-5],種子:[`zhǒng zi`,62e-6],各種:[`gè zhǒng`,28e-5],種植:[`zhòng zhí`,41e-6],種田:[`zhòng tián`,23e-6],重要:[`zhòng yào`,52e-5],重點:[`zhòng diǎn`,31e-5],重量:[`zhòng liàng`,12e-5],重複:[`chóng fù`,87e-6],重新:[`chóng xīn`,19e-5],長度:[`cháng dù`,11e-5],長期:[`cháng qī`,24e-5],長短:[`cháng duǎn`,32e-6],成長:[`chéng zhǎng`,18e-5],長大:[`zhǎng dà`,93e-6],校長:[`xiào zhǎng`,67e-6],調整:[`tiáo zhěng`,16e-5],調節:[`tiáo jié`,82e-6],調味:[`tiáo wèi`,21e-6],調查:[`diào chá`,23e-5],調動:[`diào dòng`,45e-6],強調:[`qiáng diào`,17e-5],處理:[`chǔ lǐ`,34e-5],處方:[`chǔ fāng`,28e-6],處境:[`chǔ jìng`,42e-6],到處:[`dào chù`,13e-5],四處:[`sì chù`,51e-6],會議:[`huì yì`,21e-5],會計:[`kuài jì`,37e-6],會面:[`huì miàn`,43e-6],假如:[`jiǎ rú`,89e-6],假設:[`jiǎ shè`,62e-6],假期:[`jià qī`,74e-6],假日:[`jià rì`,41e-6],空氣:[`kōng qì`,12e-5],空間:[`kōng jiān`,18e-5],天空:[`tiān kōng`,83e-6],空白:[`kòng bái`,34e-6],空閒:[`kòng xián`,27e-6],快樂:[`kuài lè`,15e-5],樂趣:[`lè qù`,68e-6],音樂:[`yīn yuè`,19e-5],樂器:[`yuè qì`,42e-6]},dict$1=Object.freeze({__proto__:null,default:dict});pinyinPro.addDict(utils.resolveJson(dict$1),`phonemize-zh`),pinyinPro.addDict({},`custom`);var PINYIN_TO_IPA={b:`p`,p:`pʰ`,d:`t`,t:`tʰ`,g:`k`,k:`kʰ`,j:`tɕ`,q:`tɕʰ`,zh:`ʈʂ`,ch:`ʈʂʰ`,z:`ts`,c:`tsʰ`,f:`f`,x:`ɕ`,sh:`ʂ`,r:`ʐ`,s:`s`,h:`x`,m:`m`,n:`n`,l:`l`,w:`w`,y:`j`,a:`a`,o:`o`,e:`ə`,i:`i`,u:`u`,ü:`y`,v:`y`,ai:`aɪ`,ei:`eɪ`,ao:`ɑʊ`,ou:`oʊ`,an:`an`,en:`ən`,ang:`ɑŋ`,eng:`əŋ`,ong:`ʊŋ`,er:`ɚ`,ia:`ia`,ie:`iɛ`,iao:`iɑʊ`,iu:`ioʊ`,iou:`ioʊ`,ian:`iɛn`,in:`in`,iang:`iɑŋ`,ing:`iŋ`,iong:`iʊŋ`,ua:`ua`,uo:`uɔ`,uai:`uaɪ`,ui:`ueɪ`,uei:`ueɪ`,uan:`uan`,un:`uən`,uen:`uən`,uang:`uɑŋ`,ueng:`uəŋ`,üe:`yɛ`,ve:`yɛ`,üan:`yɛn`,van:`yɛn`,ün:`yn`,vn:`yn`,zhi:`ʈʂɨ`,chi:`ʈʂʰɨ`,shi:`ʂɨ`,ri:`ʐɨ`,zi:`tsɨ`,ci:`tsʰɨ`,si:`sɨ`,zhong:`ʈʂʊŋ`,wen:`wən`,hao:`xɑʊ`,de:`tə`,de0:`tə`,wo:`wɔ`,ta:`tʰa`,zhe:`ʈʂə`,ge:`kə`,le:`lə`,yi:`i`,san:`san`,wu:`wu`,liu:`lioʊ`,qi:`tɕi`,ba:`pa`,jiu:`tɕioʊ`},TONE_MARKS={1:`˥˥`,2:`˧˥`,3:`˧˩˧`,4:`˥˩`,5:`˧`,0:``},INITIAL_PATTERNS=[`zh`,`ch`,`sh`],SINGLE_INITIALS=[`b`,`p`,`m`,`f`,`d`,`t`,`n`,`l`,`g`,`k`,`h`,`j`,`q`,`x`,`r`,`z`,`c`,`s`,`y`,`w`],G2PModel=function(){function n(){this.id=`zh-g2p`,this.name=`Chinese G2P Processor`,this.supportedLanguages=[`zh`]}return n.prototype.predict=function(e,t,n){return this.textToIPA(e)},n.prototype.textToIPA=function(e){if(!e?.trim())return``;var t=this.textToPinyinResults(e);return t.map(function(e){return e.ipa}).join(` `)},n.prototype.textToZhuyin=function(e){var n=this;if(!e?.trim())return``;var r=this.textToPinyinResults(e);return r.map(function(e){return n.isChinese(e.word)?utils.pinyinToZhuyin(e.pinyin):e.word}).join(` `)},n.prototype.textToPinyinResults=function(t){if(!t?.trim())return[];var n=[];try{for(var r=pinyinPro.pinyin(t,{toneType:`num`,type:`array`,v:!0,nonZh:`removed`}),i=0;i<t.length;i++){var a=t[i];if(this.isChinese(a)){var o=r[Math.min(i,r.length-1)],s=this.parsePinyinWithTone(o||a),c=s.syllable,l=s.tone,u=this.pinyinToIPA(c,l);n.push({pinyin:o||a,tone:l,ipa:u,word:a})}else n.push({pinyin:a,tone:0,ipa:a,word:a})}}catch(e){return console.warn(`Chinese G2P conversion failed:`,e),Array.from(t).map(function(e){return{pinyin:e,tone:0,ipa:e,word:e}})}return n},n.prototype.pinyinToIPA=function(e,t){var n=PINYIN_TO_IPA[e];if(n)return n+TONE_MARKS[t];var r=this.decomposePinyin(e),o=r.initial,s=r.final,c=PINYIN_TO_IPA[o]||``,l=PINYIN_TO_IPA[s]||s;return c+l+TONE_MARKS[t]},n.prototype.decomposePinyin=function(e){for(var t=0,n=INITIAL_PATTERNS;t<n.length;t++){var r=n[t];if(e.startsWith(r))return{initial:r,final:e.slice(r.length)}}for(var i=0,a=SINGLE_INITIALS;i<a.length;i++){var r=a[i];if(e.startsWith(r))return{initial:r,final:e.slice(1)}}return{initial:``,final:e}},n.prototype.parsePinyinWithTone=function(e){var t=e.match(/^(.+?)([1-5]?)$/);if(t){var n=t[1],r=t[2],i=r?parseInt(r,10):5;return{syllable:n,tone:i}}return{syllable:e,tone:5}},n.prototype.isChinese=function(e){var t=e.charCodeAt(0);return t>=19968&&t<=40959||t>=13312&&t<=19903||t>=131072&&t<=173791||t>=173824&&t<=177983||t>=177984&&t<=178207||t>=178208&&t<=183983||t>=183984&&t<=191471},n.prototype.addPronunciation=function(t,n){var r;pinyinPro.addDict((r={},r[t.trim()]=n,r),{name:`custom`,dict1:`add`})},n}();module.exports=G2PModel;