kokoro-js
Version:
High-quality text-to-speech for the web
2 lines (1 loc) • 11 kB
JavaScript
var e=require("@huggingface/transformers"),a=require("phonemizer"),t=require("path"),r=require("fs/promises");function n(e){if(e.includes("."))return e;if(e.includes(":")){let[a,t]=e.split(":").map(Number);return 0===t?`${a} o'clock`:t<10?`${a} oh ${t}`:`${a} ${t}`}let a=parseInt(e.slice(0,4),10);if(a<1100||a%1e3<10)return e;let t=e.slice(0,2),r=parseInt(e.slice(2,4),10),n=e.endsWith("s")?"s":"";if(a%1e3>=100&&a%1e3<=999){if(0===r)return`${t} hundred${n}`;if(r<10)return`${t} oh ${r}${n}`}return`${t} ${r}${n}`}function l(e){const a="$"===e[0]?"dollar":"pound";if(isNaN(Number(e.slice(1))))return`${e.slice(1)} ${a}s`;if(!e.includes(".")){let t="1"===e.slice(1)?"":"s";return`${e.slice(1)} ${a}${t}`}const[t,r]=e.slice(1).split("."),n=parseInt(r.padEnd(2,"0"),10);return`${t} ${a}${"1"===t?"":"s"} and ${n} ${"$"===e[0]?1===n?"cent":"cents":1===n?"penny":"pence"}`}function s(e){let[a,t]=e.split(".");return`${a} point ${t.split("").join(" ")}`}const i=new RegExp(`(\\s*[${o=';:,.!?¡¿—…"«»“”(){}[]',o.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}]+\\s*)+`,"g");var o;async function c(e,t="a",r=!0){r&&(e=function(e){return e.replace(/[‘’]/g,"'").replace(/«/g,"“").replace(/»/g,"”").replace(/[“”]/g,'"').replace(/\(/g,"«").replace(/\)/g,"»").replace(/、/g,", ").replace(/。/g,". ").replace(/!/g,"! ").replace(/,/g,", ").replace(/:/g,": ").replace(/;/g,"; ").replace(/?/g,"? ").replace(/[^\S \n]/g," ").replace(/ +/," ").replace(/(?<=\n) +(?=\n)/g,"").replace(/\bD[Rr]\.(?= [A-Z])/g,"Doctor").replace(/\b(?:Mr\.|MR\.(?= [A-Z]))/g,"Mister").replace(/\b(?:Ms\.|MS\.(?= [A-Z]))/g,"Miss").replace(/\b(?:Mrs\.|MRS\.(?= [A-Z]))/g,"Mrs").replace(/\betc\.(?! [A-Z])/gi,"etc").replace(/\b(y)eah?\b/gi,"$1e'a").replace(/\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)/g,n).replace(/(?<=\d),(?=\d)/g,"").replace(/[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b/gi,l).replace(/\d*\.\d+/g,s).replace(/(?<=\d)-(?=\d)/g," to ").replace(/(?<=\d)S/g," S").replace(/(?<=[BCDFGHJ-NP-TV-Z])'?s\b/g,"'S").replace(/(?<=X')S\b/g,"s").replace(/(?:[A-Za-z]\.){2,} [a-z]/g,(e=>e.replace(/\./g,"-"))).replace(/(?<=[A-Z])\.(?=[A-Z])/gi,"-").trim()}(e));const o=function(e,a){const t=[];let r=0;for(const n of e.matchAll(a)){const a=n[0];r<n.index&&t.push({match:!1,text:e.slice(r,n.index)}),a.length>0&&t.push({match:!0,text:a}),r=n.index+a.length}return r<e.length&&t.push({match:!1,text:e.slice(r)}),t}(e,i),c="a"===t?"en-us":"en",g=(await Promise.all(o.map((async({match:e,text:t})=>e?t:(await a.phonemize(t,c)).join(" "))))).join("");let u=g.replace(/kəkˈoːɹoʊ/g,"kˈoʊkəɹoʊ").replace(/kəkˈɔːɹəʊ/g,"kˈəʊkəɹəʊ").replace(/ʲ/g,"j").replace(/r/g,"ɹ").replace(/x/g,"k").replace(/ɬ/g,"l").replace(/(?<=[a-zɹː])(?=hˈʌndɹɪd)/g," ").replace(/ z(?=[;:,.!?¡¿—…"«»“” ]|$)/g,"z");return"a"===t&&(u=u.replace(/(?<=nˈaɪn)ti(?!ː)/g,"di")),u.trim()}function g(e,a=!0){return".!?…。?!".includes(e)||a&&"\n"===e}function u(e,a){let t=a;for(;t<e.length&&!/\s/.test(e[t]);)++t;return e.substring(a,t)}const d=new Set(["mr","mrs","ms","dr","prof","sr","jr","sgt","col","gen","rep","sen","gov","lt","maj","capt","st","mt","etc","co","inc","ltd","dept","vs","p","pg","jan","feb","mar","apr","jun","jul","aug","sep","sept","oct","nov","dec","sun","mon","tu","tue","tues","wed","th","thu","thur","thurs","fri","sat"]);function p(e){return e=e.replace(/['’]s$/i,"").replace(/\.+$/,""),d.has(e.toLowerCase())}const h=new Map([[")","("],["]","["],["}","{"],["》","《"],["〉","〈"],["›","‹"],["»","«"],["〉","〈"],["」","「"],["』","『"],["〕","〔"],["】","【"]]),m=new Set(h.values());function f(e,a,t,r){if('"'===e||"'"===e){if("'"===e&&t>0&&t<r.length-1&&/[A-Za-z]/.test(r[t-1])&&/[A-Za-z]/.test(r[t+1]))return;return void(a.length&&a.at(-1)===e?a.pop():a.push(e))}if(m.has(e))return void a.push(e);const n=h.get(e);n&&a.length&&a.at(-1)===n&&a.pop()}class _{constructor(){this._buffer="",this._sentences=[],this._resolver=null,this._closed=!1}push(...e){for(const a of e)this._buffer+=a,this._process()}close(){if(this._closed)throw new Error("Stream is already closed.");this._closed=!0,this.flush()}flush(){const e=this._buffer.trim();e.length>0&&this._sentences.push(e),this._buffer="",this._resolve()}_resolve(){this._resolver&&(this._resolver(),this._resolver=null)}_process(){let e=0;const a=this._buffer,t=a.length;let r=0,n=[];const l=e=>{let r=e;for(;r+1<t&&g(a[r+1],!1);)++r;for(;r+1<t&&(n=a[r+1],"\"')]}」』".includes(n));)++r;var n;let l=r+1;for(;l<t&&/\s/.test(a[l]);)++l;return{end:r,nextNonSpace:l}};for(;r<t;){const s=a[r];if(f(s,n,r,a),0===n.length&&g(s)){const n=a.slice(e,r);if(/(^|\n)\d+$/.test(n)){++r;continue}const{end:i,nextNonSpace:o}=l(r);if(r===o-1&&"\n"!==s){++r;continue}if(o===t)break;let c=r-1;for(;c>=0&&/\S/.test(a[c]);)c--;c=Math.max(e,c+1);const d=u(a,c);if(!d){++r;continue}if((/https?[,:]\/\//.test(d)||d.includes("@"))&&!g(d.at(-1))){r=c+d.length;continue}if(p(d)){++r;continue}if(/^([A-Za-z]\.)+$/.test(d)&&o<t&&/[A-Z]/.test(a[o])){++r;continue}if("."===s&&o<t&&/[a-z]/.test(a[o])){++r;continue}const h=a.substring(e,i+1).trim();if("..."===h||"…"===h){++r;continue}h&&this._sentences.push(h),r=e=i+1}else++r}this._buffer=a.substring(e),this._sentences.length>0&&this._resolve()}async*[Symbol.asyncIterator](){if(this._resolver)throw new Error("Another iterator is already active.");for(;;)if(this._sentences.length>0)yield this._sentences.shift();else{if(this._closed)break;await new Promise((e=>{this._resolver=e}))}}[Symbol.iterator](){this.flush();const e=this._sentences[Symbol.iterator]();return this._sentences=[],e}get sentences(){return this._sentences}}const v=Object.freeze({af_heart:{name:"Heart",language:"en-us",gender:"Female",traits:"❤️",targetQuality:"A",overallGrade:"A"},af_alloy:{name:"Alloy",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C"},af_aoede:{name:"Aoede",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C+"},af_bella:{name:"Bella",language:"en-us",gender:"Female",traits:"🔥",targetQuality:"A",overallGrade:"A-"},af_jessica:{name:"Jessica",language:"en-us",gender:"Female",targetQuality:"C",overallGrade:"D"},af_kore:{name:"Kore",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C+"},af_nicole:{name:"Nicole",language:"en-us",gender:"Female",traits:"🎧",targetQuality:"B",overallGrade:"B-"},af_nova:{name:"Nova",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C"},af_river:{name:"River",language:"en-us",gender:"Female",targetQuality:"C",overallGrade:"D"},af_sarah:{name:"Sarah",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C+"},af_sky:{name:"Sky",language:"en-us",gender:"Female",targetQuality:"B",overallGrade:"C-"},am_adam:{name:"Adam",language:"en-us",gender:"Male",targetQuality:"D",overallGrade:"F+"},am_echo:{name:"Echo",language:"en-us",gender:"Male",targetQuality:"C",overallGrade:"D"},am_eric:{name:"Eric",language:"en-us",gender:"Male",targetQuality:"C",overallGrade:"D"},am_fenrir:{name:"Fenrir",language:"en-us",gender:"Male",targetQuality:"B",overallGrade:"C+"},am_liam:{name:"Liam",language:"en-us",gender:"Male",targetQuality:"C",overallGrade:"D"},am_michael:{name:"Michael",language:"en-us",gender:"Male",targetQuality:"B",overallGrade:"C+"},am_onyx:{name:"Onyx",language:"en-us",gender:"Male",targetQuality:"C",overallGrade:"D"},am_puck:{name:"Puck",language:"en-us",gender:"Male",targetQuality:"B",overallGrade:"C+"},am_santa:{name:"Santa",language:"en-us",gender:"Male",targetQuality:"C",overallGrade:"D-"},bf_emma:{name:"Emma",language:"en-gb",gender:"Female",traits:"🚺",targetQuality:"B",overallGrade:"B-"},bf_isabella:{name:"Isabella",language:"en-gb",gender:"Female",targetQuality:"B",overallGrade:"C"},bm_george:{name:"George",language:"en-gb",gender:"Male",targetQuality:"B",overallGrade:"C"},bm_lewis:{name:"Lewis",language:"en-gb",gender:"Male",targetQuality:"C",overallGrade:"D+"},bf_alice:{name:"Alice",language:"en-gb",gender:"Female",traits:"🚺",targetQuality:"C",overallGrade:"D"},bf_lily:{name:"Lily",language:"en-gb",gender:"Female",traits:"🚺",targetQuality:"C",overallGrade:"D"},bm_daniel:{name:"Daniel",language:"en-gb",gender:"Male",traits:"🚹",targetQuality:"C",overallGrade:"D"},bm_fable:{name:"Fable",language:"en-gb",gender:"Male",traits:"🚹",targetQuality:"B",overallGrade:"C"}});const b=new Map;async function y(e){if(b.has(e))return b.get(e);const a=new Float32Array(await async function(e){if(r&&Object.hasOwn(r,"readFile")){const a="undefined"!=typeof __dirname?__dirname:void 0,n=t.resolve(a,`../voices/${e}.bin`),{buffer:l}=await r.readFile(n);return l}const a=`https://huggingface.co/onnx-community/Kokoro-82M-v1.0-ONNX/resolve/main/voices/${e}.bin`;let n;try{n=await caches.open("kokoro-voices");const e=await n.match(a);if(e)return await e.arrayBuffer()}catch(e){console.warn("Unable to open cache",e)}const l=await fetch(a),s=await l.arrayBuffer();if(n)try{await n.put(a,new Response(s,{headers:l.headers}))}catch(e){console.warn("Unable to cache file",e)}return s}(e));return b.set(e,a),a}class w{constructor(e,a){this.model=e,this.tokenizer=a}static async from_pretrained(a,{dtype:t="fp32",device:r=null,progress_callback:n=null}={}){const l=e.StyleTextToSpeech2Model.from_pretrained(a,{progress_callback:n,dtype:t,device:r}),s=e.AutoTokenizer.from_pretrained(a,{progress_callback:n}),i=await Promise.all([l,s]);return new w(...i)}get voices(){return v}list_voices(){console.table(v)}_validate_voice(e){if(!v.hasOwnProperty(e))throw console.error(`Voice "${e}" not found. Available voices:`),console.table(v),new Error(`Voice "${e}" not found. Should be one of: ${Object.keys(v).join(", ")}.`);return e.at(0)}async generate(e,{voice:a="af_heart",speed:t=1}={}){const r=this._validate_voice(a),n=await c(e,r),{input_ids:l}=this.tokenizer(n,{truncation:!0});return this.generate_from_ids(l,{voice:a,speed:t})}async generate_from_ids(a,{voice:t="af_heart",speed:r=1}={}){const n=256*Math.min(Math.max(a.dims.at(-1)-2,0),509),l=(await y(t)).slice(n,n+256),s={input_ids:a,style:new e.Tensor("float32",l,[1,256]),speed:new e.Tensor("float32",[r],[1])},{waveform:i}=await this.model(s);return new e.RawAudio(i.data,24e3)}async*stream(e,{voice:a="af_heart",speed:t=1,split_pattern:r=null}={}){const n=this._validate_voice(a);let l;if(e instanceof _)l=e;else{if("string"!=typeof e)throw new Error("Invalid input type. Expected string or TextSplitterStream.");{l=new _;const a=r?e.split(r).map((e=>e.trim())).filter((e=>e.length>0)):[e];l.push(...a)}}for await(const e of l){const r=await c(e,n),{input_ids:l}=this.tokenizer(r,{truncation:!0}),s=await this.generate_from_ids(l,{voice:a,speed:t});yield{text:e,phonemes:r,audio:s}}}}const $={set wasmPaths(a){e.env.backends.onnx.wasm.wasmPaths=a},get wasmPaths(){return e.env.backends.onnx.wasm.wasmPaths}};exports.KokoroTTS=w,exports.TextSplitterStream=_,exports.env=$;
;