UNPKG

@chengsokdara/use-whisper

Version:

React Hook for OpenAI Whisper API with speech recorder and silence removal built-in.

8 lines (5 loc) 4.84 kB
import { b, c, d } from './chunk-VO7VPLVP.js'; import { useEffectAsync, useMemoAsync } from '@chengsokdara/react-hooks-async'; import { useRef, useState, useEffect } from 'react'; var ne={apiKey:"",autoStart:!1,autoTranscribe:!0,mode:"transcriptions",nonStop:!1,removeSilence:!1,stopTimeout:5e3,streaming:!1,timeSlice:1e3,onDataAvailable:void 0,onTranscribe:void 0},oe={stop:void 0},ae={blob:void 0,text:void 0},ue=P=>{let{apiKey:m,autoStart:A,autoTranscribe:U,mode:h,nonStop:B,removeSilence:M,stopTimeout:q,streaming:S,timeSlice:I,whisperConfig:c$1,onDataAvailable:K,onTranscribe:T}={...ne,...P};if(!m&&!T)throw new Error("apiKey is required if onTranscribe is not provided");let f=useRef([]),i=useRef(),s=useRef(),t=useRef(),a=useRef(),d$1=useRef(oe),[O,k]=useState(!1),[$,C]=useState(!1),[j,g]=useState(!1),[z,l]=useState(ae);useEffect(()=>()=>{f.current&&(f.current=[]),i.current&&(i.current.flush(),i.current=void 0),t.current&&(t.current.destroy(),t.current=void 0),b$1("stop"),s.current&&(s.current.off("speaking",R),s.current.off("stopped_speaking",v)),a.current&&(a.current.getTracks().forEach(e=>e.stop()),a.current=void 0);},[]),useEffectAsync(async()=>{A&&await W();},[A]);let N=async()=>{await W();},G=async()=>{await V();},J=async()=>{await E();},W=async()=>{try{if(a.current||await Q(),a.current){if(!t.current){let{default:{RecordRTCPromisesHandler:r,StereoAudioRecorder:o}}=await import('recordrtc'),n={mimeType:"audio/wav",numberOfAudioChannels:1,recorderType:o,sampleRate:44100,timeSlice:S?I:void 0,type:"audio",ondataavailable:U&&S?Z:void 0};t.current=new r(a.current,n);}if(!i.current){let{Mp3Encoder:r}=await import('lamejs');i.current=new r(1,44100,96);}let e=await t.current.getState();(e==="inactive"||e==="stopped")&&await t.current.startRecording(),e==="paused"&&await t.current.resumeRecording(),B&&x("stop"),k(!0);}}catch{}},Q=async()=>{try{if(a.current&&a.current.getTracks().forEach(e=>e.stop()),a.current=await navigator.mediaDevices.getUserMedia({audio:!0}),!s.current){let{default:e}=await import('hark');s.current=e(a.current,{interval:100,play:!1}),s.current.on("speaking",R),s.current.on("stopped_speaking",v);}}catch{}},x=e=>{d$1.current[e]||(d$1.current[e]=setTimeout(E,q));},R=()=>{C(!0),b$1("stop");},v=()=>{C(!1),B&&x("stop");},V=async()=>{try{t.current&&(await t.current.getState()==="recording"&&await t.current.pauseRecording(),b$1("stop"),k(!1));}catch{}},E=async()=>{try{if(t.current){let e=await t.current.getState();if((e==="recording"||e==="paused")&&await t.current.stopRecording(),X(),b$1("stop"),k(!1),U)await Y();else {let r=await t.current.getBlob();l({blob:r});}await t.current.destroy(),f.current=[],i.current&&(i.current.flush(),i.current=void 0),t.current=void 0;}}catch{}},X=()=>{s.current&&(s.current.off("speaking",R),s.current.off("stopped_speaking",v),s.current=void 0),a.current&&(a.current.getTracks().forEach(e=>e.stop()),a.current=void 0);},b$1=e=>{d$1.current[e]&&(clearTimeout(d$1.current[e]),d$1.current[e]=void 0);},Y=async()=>{try{if(i.current&&t.current&&await t.current.getState()==="stopped"){g(!0);let r=await t.current.getBlob();if(M){let{createFFmpeg:o}=await import('@ffmpeg/ffmpeg'),n=o({mainName:"main",corePath:b,log:!0});n.isLoaded()||await n.load();let u=await r.arrayBuffer();n.FS("writeFile","in.wav",new Uint8Array(u)),await n.run("-i","in.wav","-acodec","libmp3lame","-b:a","96k","-ar","44100","-af",c,"out.mp3");let w=n.FS("readFile","out.mp3");if(w.length<=225){n.exit(),l({blob:r}),g(!1);return}r=new Blob([w.buffer],{type:"audio/mpeg"}),n.exit();}else {let o=await r.arrayBuffer(),n=i.current.encodeBuffer(new Int16Array(o));r=new Blob([n],{type:"audio/mpeg"});}if(typeof T=="function"){let o=await T(r);l(o);}else {let o=new File([r],"speech.mp3",{type:"audio/mpeg"}),n=await F(o);l({blob:r,text:n});}g(!1);}}catch{g(!1);}},Z=async e=>{try{if(S&&t.current){if(K?.(e),i.current){let o=await e.arrayBuffer(),n=i.current.encodeBuffer(new Int16Array(o)),u=new Blob([n],{type:"audio/mpeg"});f.current.push(u);}if(await t.current.getState()==="recording"){let o=new Blob(f.current,{type:"audio/mpeg"}),n=new File([o],"speech.mp3",{type:"audio/mpeg"}),u=await F(n);u&&l(w=>({...w,text:u}));}}}catch{}},F=useMemoAsync(async e=>{let r=new FormData;r.append("file",e),r.append("model","whisper-1"),h==="transcriptions"&&r.append("language",c$1?.language??"en"),c$1?.prompt&&r.append("prompt",c$1.prompt),c$1?.response_format&&r.append("response_format",c$1.response_format),c$1?.temperature&&r.append("temperature",`${c$1.temperature}`);let o={};o["Content-Type"]="multipart/form-data",m&&(o.Authorization=`Bearer ${m}`);let{default:n}=await import('axios');return (await n.post(d+h,r,{headers:o})).data.text},[m,h,c$1]);return {recording:O,speaking:$,transcribing:j,transcript:z,pauseRecording:G,startRecording:N,stopRecording:J}}; export { ue as a };