UNPKG

tafrigh

Version:

A NodeJS library for transcribing audio/video to text.

2 lines 4.48 kB
import{formatMedia as Y,splitFileOnSilences as B}from"ffmpeg-simplified";import{promises as N}from"node:fs";import X from"node:path";import A from"node:process";import _ from"pino";import $ from"process";var E=_({base:{hostname:void 0,pid:void 0},level:$.env.LOG_LEVEL||"info"}),s=E;var f=A.env.WIT_AI_API_KEYS?A.env.WIT_AI_API_KEYS.split(" "):[],g=0,l=()=>f.length,T=()=>{if(l()===0)throw s.error("At least one Wit.ai API key is required. Please set them in your environment variables."),new Error("Empty wit.ai API keys")},C=()=>{T();let e=f[g];return g=(g+1)%f.length,e},b=e=>{f.length=0,f.push(...e),T()};import L from"p-queue";var w=(e,{end:t,start:r})=>{let o=(e.tokens||[]).filter(n=>n.token).map(n=>({...n.confidence&&{confidence:n.confidence},end:n.end/1e3+r,start:n.start/1e3+r,text:n.token}));return{...e.confidence&&{confidence:e.confidence},end:o.at(-1)?.end??t,start:o[0]?.start??r,text:e.text.trim(),...o.length>0&&{tokens:o}}};import{setTimeout as K}from"node:timers/promises";var P=5,W=1e3,x=async(e,t=P,r=W)=>{for(let o=1;o<=t;o++)try{return await e()}catch(n){if(o<t){let i=r*Math.pow(2,o-1);s.warn(`Attempt ${o} failed due to ${String(n)}. Retrying in ${i}ms...`),await K(i)}else throw s.error(`All ${t} attempts failed.`),n}throw new Error("Exponential backoff failed unexpectedly")};import v from"jsonstream-next";import D from"node:fs";import M from"node:https";var U="FINAL_TRANSCRIPTION",q=(e,t)=>{let r={Authorization:`Bearer ${t.apiKey}`,"Content-Type":"audio/wav"};return(e.endsWith(".mp3")||e.endsWith(".m4a")||e.endsWith(".mp4"))&&(r["Content-Type"]="audio/mpeg3"),r};async function S(e,t){let r=D.createReadStream(e),o={headers:{...q(e,t),Accept:"application/vnd.wit.20200513+json"},hostname:"api.wit.ai",method:"POST",path:"/dictation?v=20240304"};return new Promise((n,i)=>{let m=M.request(o,p=>{if(p.statusCode!==200){i(new Error(`HTTP error! status: ${p.statusCode}`));return}let a={text:"",tokens:[]},c={},d=v.parse("*");p.pipe(d),d.on("data",u=>{u===!0?c={}:u===U?(a.tokens?.push(...c.tokens||[]),a.text+=` ${c.text}`,a.confidence=c.confidence):typeof u=="string"?c.text=u:u&&typeof u=="object"&&Object.assign(c,u)}),d.on("end",()=>{n(a)}),d.on("error",i)});m.on("error",i),r.pipe(m)})}var j=e=>e.slice(0,3)+"*****"+e[Math.floor(e.length/2)]+"*****"+e.slice(-3),R=async(e,t,r,o)=>{let n=await x(()=>{let i=C();return s.info(`Calling dictation for ${e.filename} with key ${j(i)}`),S(e.filename,{apiKey:i})},o);return r?.onTranscriptionProgress&&r.onTranscriptionProgress(t),n.text?.trim()?w(n,e.range):null},H=async(e,t,r)=>{let o=[];s.debug(`transcribeAudioChunksInSingleThread for ${e.length}`);for(let[n,i]of e.entries()){let m=await R(i,n,t,r);m?(o.push(m),s.trace(`Transcript received for chunk: ${i.filename}`)):s.warn("Skipping empty transcript")}return t?.onTranscriptionFinished&&await t.onTranscriptionFinished(o),o},F=async(e,t,r,o)=>{s.debug(`transcribeAudioChunksWithConcurrency ${t}`);let n=[],i=new L({concurrency:t}),m=async(p,a)=>{let c=await R(a,p,r,o);c?(n.push(c),s.trace(`Transcript received for chunk: ${a.filename}`)):s.warn("Skipping empty transcript")};return e.forEach((p,a)=>{i.add(()=>m(a,p))}),await i.onIdle(),n.sort((p,a)=>p.start-a.start),r?.onTranscriptionFinished&&await r.onTranscriptionFinished(n),n},O=async(e,{callbacks:t,concurrency:r=1,retries:o}={})=>{let n=l(),i=r&&r<=n?r:n;return t?.onTranscriptionStarted&&await t?.onTranscriptionStarted(e.length),e.length===1||r===1?H(e,t,o):F(e,i,t,o)};var h=300,y=4,k=1;var I=e=>{if(e?.splitOptions?.chunkDuration){let{chunkDuration:t}=e.splitOptions;if(t<4)throw new Error(`chunkDuration=${t} cannot be less than ${4}s`);if(t>300)throw new Error(`chunkDuration=${t} cannot be greater than ${300}s`)}if(e?.concurrency&&e?.concurrency<1)throw new Error(`concurrency=${e?.concurrency} must be a positive integer.`)};var Re=e=>{b(e.apiKeys)},Oe=async(e,t)=>{s.info(t,`transcribe ${e} (${typeof e})`),I(t);let r=await N.mkdtemp("tafrigh");s.debug(`Using ${r}`);let o=await Y(e,X.format({dir:r,ext:".mp3",name:Date.now().toString()}),t?.preprocessOptions,t?.callbacks),n=await B(o,r,t?.splitOptions,t?.callbacks),i=n.length?await O(n,{callbacks:t?.callbacks,concurrency:t?.concurrency,retries:t?.retries}):[];return s.debug(n,"Generated chunks"),t?.preventCleanup||(s.info(`Cleaning up ${r}`),await N.rm(r,{recursive:!0})),i};export{h as MAX_CHUNK_DURATION,y as MIN_CHUNK_DURATION,k as MIN_CONCURRENCY,Re as init,Oe as transcribe}; //# sourceMappingURL=index.js.map