UNPKG

tafrigh

Version:

A NodeJS library for transcribing audio/video to text.

159 lines (154 loc) 5 kB
import { PreprocessingCallbacks, SplitOnSilenceCallbacks, TimeRange, PreprocessOptions, SplitOptions } from 'ffmpeg-simplified'; import { Readable } from 'node:stream'; /** * Callbacks for monitoring and responding to various stages of the transcription process. * Extends preprocessing and splitting callbacks from ffmpeg-simplified. */ interface Callbacks extends PreprocessingCallbacks, SplitOnSilenceCallbacks, SplitOnSilenceCallbacks { /** * Called when all transcription is complete. * @param {Segment[]} transcripts - Array of all transcribed segments * @returns {Promise<void>} */ onTranscriptionFinished?: (transcripts: Segment[]) => Promise<void>; /** * Called when each individual chunk is transcribed. * @param {number} chunkIndex - Index of the current chunk being processed */ onTranscriptionProgress?: (chunkIndex: number) => void; /** * Called just before the transcription process begins. * @param {number} totalChunks - Total number of chunks to be transcribed * @returns {Promise<void>} */ onTranscriptionStarted?: (totalChunks: number) => Promise<void>; } /** * Represents a segment of transcribed audio with timing information. * May include detailed token-level information when available. */ type Segment = Token & { /** * Word-by-word breakdown of the transcription with individual timings */ tokens?: Token[]; }; /** * Represents a token (word or phrase) in the transcription with timing information. */ type Token = TimeRange & { /** * Confidence score for this transcription (between 0 and 1) */ confidence?: number; /** * The transcribed text */ text: string; }; /** * Configuration options for the transcribe function. */ type TranscribeOptions = { /** * Callbacks for monitoring progress and responding to events */ callbacks?: Callbacks; /** * Maximum number of concurrent transcription operations * Limited by the number of available API keys */ concurrency?: number; /** * Options for audio preprocessing (noise reduction, filtering, etc.) */ preprocessOptions?: PreprocessOptions; /** * If true, temporary processing directories won't be deleted * Useful for debugging */ preventCleanup?: boolean; /** * Number of retry attempts for failed transcription requests * Uses exponential backoff */ retries?: number; /** * Options for splitting audio into chunks */ splitOptions?: SplitOptions; }; /** * Response structure from the Wit.ai API. * @internal */ type WitAiResponse = { /** * Confidence score for the entire transcription (between 0 and 1) */ confidence?: number; /** * The transcribed text */ text?: string; /** * Array of token objects with detailed timing information */ tokens?: WitAiToken[]; }; /** * Token structure as returned by the Wit.ai API. * @internal */ type WitAiToken = TimeRange & { /** * Confidence score for this token (between 0 and 1) */ confidence?: number; /** * The transcribed token text */ token: string; }; declare const MAX_CHUNK_DURATION: number; declare const MIN_CHUNK_DURATION = 4; declare const MIN_CONCURRENCY = 1; /** * Initializes the tafrigh library with the provided Wit.ai API keys. * * @param {Object} options - Configuration options for initialization * @param {string[]} options.apiKeys - Array of Wit.ai API keys to use for transcription * @example * import { init } from 'tafrigh'; * init({ apiKeys: ['your-wit-ai-key'] }); */ declare const init: (options: { apiKeys: string[]; }) => void; /** * Transcribes audio content and returns an array of transcript segments. * * This function takes an audio file (or stream) and returns a structured transcript with * timestamps. It handles preprocessing the audio, splitting it into chunks, and * transcribing each chunk using Wit.ai's API. * * @param {string | Readable} content - Path to audio file, URL, or readable stream * @param {TranscribeOptions} [options] - Configuration options for transcription * @returns {Promise<Array>} - Promise resolving to an array of transcript segments * @throws {Error} - If transcription fails or if options validation fails * @example * import { transcribe } from 'tafrigh'; * * const transcript = await transcribe('path/to/audio.mp3', { * concurrency: 2, * splitOptions: { * chunkDuration: 60, * silenceDetection: { silenceThreshold: -30 } * } * }); * * console.log(transcript); * // [{ text: "Hello world", start: 0, end: 2.5 }, ...] */ declare const transcribe: (content: Readable | string, options?: TranscribeOptions) => Promise<Segment[]>; export { type Callbacks, MAX_CHUNK_DURATION, MIN_CHUNK_DURATION, MIN_CONCURRENCY, type Segment, type Token, type TranscribeOptions, type WitAiResponse, init, transcribe };