UNPKG

kokoro-js

Version:

High-quality text-to-speech for the web

2 lines (1 loc) 11 kB
import{StyleTextToSpeech2Model as e,AutoTokenizer as a,Tensor as t,RawAudio as r,env as n}from"@huggingface/transformers";import{phonemize as l}from"phonemizer";import s from"path";import i from"fs/promises";function o(e){if(e.includes("."))return e;if(e.includes(":")){let[a,t]=e.split(":").map(Number);return 0===t?`${a} o'clock`:t<10?`${a} oh ${t}`:`${a} ${t}`}let a=parseInt(e.slice(0,4),10);if(a<1100||a%1e3<10)return e;let t=e.slice(0,2),r=parseInt(e.slice(2,4),10),n=e.endsWith("s")?"s":"";if(a%1e3>=100&&a%1e3<=999){if(0===r)return`${t} hundred${n}`;if(r<10)return`${t} oh ${r}${n}`}return`${t} ${r}${n}`}function c(e){const a="$"===e[0]?"dollar":"pound";if(isNaN(Number(e.slice(1))))return`${e.slice(1)} ${a}s`;if(!e.includes(".")){let t="1"===e.slice(1)?"":"s";return`${e.slice(1)} ${a}${t}`}const[t,r]=e.slice(1).split("."),n=parseInt(r.padEnd(2,"0"),10);return`${t} ${a}${"1"===t?"":"s"} and ${n} ${"$"===e[0]?1===n?"cent":"cents":1===n?"penny":"pence"}`}function g(e){let[a,t]=e.split(".");return`${a} point ${t.split("").join(" ")}`}const u=new RegExp(`(\\s*[${d=';:,.!?¡¿—…"«»“”(){}[]',d.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}]+\\s*)+`,"g");var d;async function m(e,a="a",t=!0){t&&(e=function(e){return e.replace(/[‘’]/g,"'").replace(/«/g,"“").replace(/»/g,"”").replace(/[“”]/g,'"').replace(/\(/g,"«").replace(/\)/g,"»").replace(/、/g,", ").replace(/。/g,". ").replace(/!/g,"! ").replace(/,/g,", ").replace(/:/g,": ").replace(/;/g,"; ").replace(/?/g,"? ").replace(/[^\S \n]/g," ").replace(/ +/," ").replace(/(?<=\n) +(?=\n)/g,"").replace(/\bD[Rr]\.(?= [A-Z])/g,"Doctor").replace(/\b(?:Mr\.|MR\.(?= [A-Z]))/g,"Mister").replace(/\b(?:Ms\.|MS\.(?= [A-Z]))/g,"Miss").replace(/\b(?:Mrs\.|MRS\.(?= [A-Z]))/g,"Mrs").replace(/\betc\.(?! [A-Z])/gi,"etc").replace(/\b(y)eah?\b/gi,"$1e'a").replace(/\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)/g,o).replace(/(?<=\d),(?=\d)/g,"").replace(/[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b/gi,c).replace(/\d*\.\d+/g,g).replace(/(?<=\d)-(?=\d)/g," to ").replace(/(?<=\d)S/g," S").replace(/(?<=[BCDFGHJ-NP-TV-Z])'?s\b/g,"'S").replace(/(?<=X')S\b/g,"s").replace(/(?:[A-Za-z]\.){2,} [a-z]/g,(e=>e.replace(/\./g,"-"))).replace(/(?<=[A-Z])\.(?=[A-Z])/gi,"-").trim()}(e));const r=function(e,a){const t=[];let r=0;for(const n of e.matchAll(a)){const a=n[0];r<n.index&&t.push({match:!1,text:e.slice(r,n.index)}),a.length>0&&t.push({match:!0,text:a}),r=n.index+a.length}return r<e.length&&t.push({match:!1,text:e.slice(r)}),t}(e,u),n="a"===a?"en-us":"en",s=(await Promise.all(r.map((async({match:e,text:a})=>e?a:(await l(a,n)).join(" "))))).join("");let i=s.replace(/kəkˈoːɹoʊ/g,"kˈoʊkəɹoʊ").replace(/kəkˈɔːɹəʊ/g,"kˈəʊkəɹəʊ").replace(/ʲ/g,"j").replace(/r/g,"ɹ").replace(/x/g,"k").replace(/ɬ/g,"l").replace(/(?<=[a-zɹː])(?=hˈʌndɹɪd)/g," ").replace(/ z(?=[;:,.!?¡¿—…"«»“” ]|$)/g,"z");return"a"===a&&(i=i.replace(/(?<=nˈaɪn)ti(?!ː)/g,"di")),i.trim()}function p(e,a=!0){return".!?…。?!".includes(e)||a&&"\n"===e}function f(e,a){let t=a;for(;t<e.length&&!/\s/.test(e[t]);)++t;return e.substring(a,t)}const h=new Set(["mr","mrs","ms","dr","prof","sr","jr","sgt","col","gen","rep","sen","gov","lt","maj","capt","st","mt","etc","co","inc","ltd","dept","vs","p","pg","jan","feb","mar","apr","jun","jul","aug","sep","sept","oct","nov","dec","sun","mon","tu","tue","tues","wed","th","thu","thur","thurs","fri","sat"]);function _(e){return e=e.replace(/['’]s$/i,"").replace(/\.+$/,""),h.has(e.toLowerCase())}const v=new Map([[")","("],["]","["],["}","{"],["》","《"],["〉","〈"],["›","‹"],["»","«"],["〉","〈"],["」","「"],["』","『"],["〕","〔"],["】","【"]]),b=new Set(v.values());function y(e,a,t,r){if('"'===e||"'"===e){if("'"===e&&t>0&&t<r.length-1&&/[A-Za-z]/.test(r[t-1])&&/[A-Za-z]/.test(r[t+1]))return;return void(a.length&&a.at(-1)===e?a.pop():a.push(e))}if(b.has(e))return void a.push(e);const n=v.get(e);n&&a.length&&a.at(-1)===n&&a.pop()}class w{constructor(){this._buffer="",this._sentences=[],this._resolver=null,this._closed=!1}push(...e){for(const a of e)this._buffer+=a,this._process()}close(){if(this._closed)throw new Error("Stream is already closed.");this._closed=!0,this.flush()}flush(){const e=this._buffer.trim();e.length>0&&this._sentences.push(e),this._buffer="",this._resolve()}_resolve(){this._resolver&&(this._resolver(),this._resolver=null)}_process(){let e=0;const a=this._buffer,t=a.length;let r=0,n=[];const l=e=>{let r=e;for(;r+1<t&&p(a[r+1],!1);)++r;for(;r+1<t&&(n=a[r+1],"\"')]}」』".includes(n));)++r;var n;let l=r+1;for(;l<t&&/\s/.test(a[l]);)++l;return{end:r,nextNonSpace:l}};for(;r<t;){const s=a[r];if(y(s,n,r,a),0===n.length&&p(s)){const n=a.slice(e,r);if(/(^|\n)\d+$/.test(n)){++r;continue}const{end:i,nextNonSpace:o}=l(r);if(r===o-1&&"\n"!==s){++r;continue}if(o===t)break;let c=r-1;for(;c>=0&&/\S/.test(a[c]);)c--;c=Math.max(e,c+1);const g=f(a,c);if(!g){++r;continue}if((/https?[,:]\/\//.test(g)||g.includes("@"))&&!p(g.at(-1))){r=c+g.length;continue}if(_(g)){++r;continue}if(/^([A-Za-z]\.)+$/.test(g)&&o<t&&/[A-Z]/.test(a[o])){++r;continue}if("."===s&&o<t&&/[a-z]/.test(a[o])){++r;continue}const u=a.substring(e,i+1).trim();if("..."===u||"…"===u){++r;continue}u&&this._sentences.push(u),r=e=i+1}else++r}this._buffer=a.substring(e),this._sentences.length>0&&this._resolve()}async*[Symbol.asyncIterator](){if(this._resolver)throw new Error("Another iterator is already active.");for(;;)if(this._sentences.length>0)yield this._sentences.shift();else{if(this._closed)break;await new Promise((e=>{this._resolver=e}))}}[Symbol.iterator](){this.flush();const e=this._sentences[Symbol.iterator]();return this._sentences=[],e}get sentences(){return this._sentences}}const $=Object.freeze({af_heart:{name:"Heart",language:"en-us",gender:"Female",traits:"❤️",targetQuality:"A",overallGrade:"A"},af_alloy:{name:"Alloy",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C"},af_aoede:{name:"Aoede",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C+"},af_bella:{name:"Bella",language:"en-us",gender:"Female",traits:"🔥",targetQuality:"A",overallGrade:"A-"},af_jessica:{name:"Jessica",language:"en-us",gender:"Female",targetQuality:"C",overallGrade:"D"},af_kore:{name:"Kore",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C+"},af_nicole:{name:"Nicole",language:"en-us",gender:"Female",traits:"🎧",targetQuality:"B",overallGrade:"B-"},af_nova:{name:"Nova",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C"},af_river:{name:"River",language:"en-us",gender:"Female",targetQuality:"C",overallGrade:"D"},af_sarah:{name:"Sarah",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C+"},af_sky:{name:"Sky",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C-"},am_adam:{name:"Adam",language:"en-us",gender:"Male",targetQuality:"D",overallGrade:"F+"},am_echo:{name:"Echo",language:"en-us",gender:"Male",targetQuality:"C",overallGrade:"D"},am_eric:{name:"Eric",language:"en-us",gender:"Male",targetQuality:"C",overallGrade:"D"},am_fenrir:{name:"Fenrir",language:"en-us",gender:"Male",targetQuality:"B",overallGrade:"C+"},am_liam:{name:"Liam",language:"en-us",gender:"Male",targetQuality:"C",overallGrade:"D"},am_michael:{name:"Michael",language:"en-us",gender:"Male",targetQuality:"B",overallGrade:"C+"},am_onyx:{name:"Onyx",language:"en-us",gender:"Male",targetQuality:"C",overallGrade:"D"},am_puck:{name:"Puck",language:"en-us",gender:"Male",targetQuality:"B",overallGrade:"C+"},am_santa:{name:"Santa",language:"en-us",gender:"Male",targetQuality:"C",overallGrade:"D-"},bf_emma:{name:"Emma",language:"en-gb",gender:"Female",traits:"🚺",targetQuality:"B",overallGrade:"B-"},bf_isabella:{name:"Isabella",language:"en-gb",gender:"Female",targetQuality:"B",overallGrade:"C"},bm_george:{name:"George",language:"en-gb",gender:"Male",targetQuality:"B",overallGrade:"C"},bm_lewis:{name:"Lewis",language:"en-gb",gender:"Male",targetQuality:"C",overallGrade:"D+"},bf_alice:{name:"Alice",language:"en-gb",gender:"Female",traits:"🚺",targetQuality:"C",overallGrade:"D"},bf_lily:{name:"Lily",language:"en-gb",gender:"Female",traits:"🚺",targetQuality:"C",overallGrade:"D"},bm_daniel:{name:"Daniel",language:"en-gb",gender:"Male",traits:"🚹",targetQuality:"C",overallGrade:"D"},bm_fable:{name:"Fable",language:"en-gb",gender:"Male",traits:"🚹",targetQuality:"B",overallGrade:"C"}});const G=new Map;async function k(e){if(G.has(e))return G.get(e);const a=new Float32Array(await async function(e){if(i&&Object.hasOwn(i,"readFile")){const a="undefined"!=typeof __dirname?__dirname:import.meta.dirname,t=s.resolve(a,`../voices/${e}.bin`),{buffer:r}=await i.readFile(t);return r}const a=`https://huggingface.co/onnx-community/Kokoro-82M-v1.0-ONNX/resolve/main/voices/${e}.bin`;let t;try{t=await caches.open("kokoro-voices");const e=await t.match(a);if(e)return await e.arrayBuffer()}catch(e){console.warn("Unable to open cache",e)}const r=await fetch(a),n=await r.arrayBuffer();if(t)try{await t.put(a,new Response(n,{headers:r.headers}))}catch(e){console.warn("Unable to cache file",e)}return n}(e));return G.set(e,a),a}class M{constructor(e,a){this.model=e,this.tokenizer=a}static async from_pretrained(t,{dtype:r="fp32",device:n=null,progress_callback:l=null}={}){const s=e.from_pretrained(t,{progress_callback:l,dtype:r,device:n}),i=a.from_pretrained(t,{progress_callback:l}),o=await Promise.all([s,i]);return new M(...o)}get voices(){return $}list_voices(){console.table($)}_validate_voice(e){if(!$.hasOwnProperty(e))throw console.error(`Voice "${e}" not found. Available voices:`),console.table($),new Error(`Voice "${e}" not found. Should be one of: ${Object.keys($).join(", ")}.`);return e.at(0)}async generate(e,{voice:a="af_heart",speed:t=1}={}){const r=this._validate_voice(a),n=await m(e,r),{input_ids:l}=this.tokenizer(n,{truncation:!0});return this.generate_from_ids(l,{voice:a,speed:t})}async generate_from_ids(e,{voice:a="af_heart",speed:n=1}={}){const l=256*Math.min(Math.max(e.dims.at(-1)-2,0),509),s=(await k(a)).slice(l,l+256),i={input_ids:e,style:new t("float32",s,[1,256]),speed:new t("float32",[n],[1])},{waveform:o}=await this.model(i);return new r(o.data,24e3)}async*stream(e,{voice:a="af_heart",speed:t=1,split_pattern:r=null}={}){const n=this._validate_voice(a);let l;if(e instanceof w)l=e;else{if("string"!=typeof e)throw new Error("Invalid input type. Expected string or TextSplitterStream.");{l=new w;const a=r?e.split(r).map((e=>e.trim())).filter((e=>e.length>0)):[e];l.push(...a)}}for await(const e of l){const r=await m(e,n),{input_ids:l}=this.tokenizer(r,{truncation:!0}),s=await this.generate_from_ids(l,{voice:a,speed:t});yield{text:e,phonemes:r,audio:s}}}}const Q={set wasmPaths(e){n.backends.onnx.wasm.wasmPaths=e},get wasmPaths(){return n.backends.onnx.wasm.wasmPaths}};export{M as KokoroTTS,w as TextSplitterStream,Q as env};