UNPKG

tafrigh

Version:

A NodeJS library for transcribing audio/video to text.

221 lines (219 loc) 6.99 kB
import { ReadStream } from "node:fs"; import { AudioChunk, PreprocessOptions, PreprocessingCallbacks, SplitOnSilenceCallbacks, SplitOptions, TimeRange } from "ffmpeg-simplified"; //#region src/types.d.ts /** * Callbacks for monitoring and responding to various stages of the transcription process. * Extends preprocessing and splitting callbacks from ffmpeg-simplified. */ interface Callbacks extends PreprocessingCallbacks, SplitOnSilenceCallbacks, SplitOnSilenceCallbacks { /** * Called when all transcription is complete. * @param {Segment[]} transcripts - Array of all transcribed segments * @returns {Promise<void>} */ onTranscriptionFinished?: (transcripts: Segment[]) => Promise<void>; /** * Called when each individual chunk is transcribed. * @param {number} chunkIndex - Index of the current chunk being processed */ onTranscriptionProgress?: (chunkIndex: number) => void; /** * Called just before the transcription process begins. * @param {number} totalChunks - Total number of chunks to be transcribed * @returns {Promise<void>} */ onTranscriptionStarted?: (totalChunks: number) => Promise<void>; } /** * Represents a segment of transcribed audio with timing information. * May include detailed token-level information when available. */ type Segment = Token & { /** * Word-by-word breakdown of the transcription with individual timings */ tokens?: Token[]; }; /** * Represents a token (word or phrase) in the transcription with timing information. */ type Token = TimeRange & { /** * Confidence score for this transcription (between 0 and 1) */ confidence?: number; /** * The transcribed text */ text: string; }; /** * Configuration options for the transcribe function. */ type TranscribeOptions = { /** * Callbacks for monitoring progress and responding to events */ callbacks?: Callbacks; /** * Maximum number of concurrent transcription operations * Limited by the number of available API keys */ concurrency?: number; /** * Options for audio preprocessing (noise reduction, filtering, etc.) */ preprocessOptions?: PreprocessOptions; /** * If true, temporary processing directories won't be deleted * Useful for debugging */ preventCleanup?: boolean; /** * Number of retry attempts for failed transcription requests * Uses exponential backoff */ retries?: number; /** * Options for splitting audio into chunks */ splitOptions?: SplitOptions; }; /** * Response structure from the Wit.ai API. * @internal */ type WitAiResponse = { /** * Confidence score for the entire transcription (between 0 and 1) */ confidence?: number; /** * The transcribed text */ text?: string; /** * Array of token objects with detailed timing information */ tokens?: WitAiToken[]; }; /** * Token structure as returned by the Wit.ai API. * @internal */ type WitAiToken = TimeRange & { /** * Confidence score for this token (between 0 and 1) */ confidence?: number; /** * The transcribed token text */ token: string; }; //#endregion //#region src/errors.d.ts type FailedTranscription = { chunk: AudioChunk; error: unknown; index: number; }; type TranscriptionErrorOptions = { chunkFiles?: AudioChunk[]; failures: FailedTranscription[]; outputDir?: string; transcripts: Segment[]; }; declare class TranscriptionError extends Error { readonly chunkFiles: AudioChunk[]; readonly failures: FailedTranscription[]; readonly outputDir?: string; readonly transcripts: Segment[]; constructor(message: string, { chunkFiles, failures, outputDir, transcripts }: TranscriptionErrorOptions); get failedChunks(): AudioChunk[]; get hasFailures(): boolean; } //#endregion //#region src/transcriber.d.ts /** * Options for the transcribeAudioChunks function. */ type TranscribeAudioChunksOptions = { /** Callback functions for progress reporting */ callbacks?: Callbacks; /** Maximum number of concurrent transcription operations */ concurrency?: number; /** Number of retry attempts for failed requests */ retries?: number; }; /** * Transcribes an array of audio chunks, either sequentially or concurrently. * * Determines the optimal concurrency based on available API keys and * the specified concurrency limit, then dispatches to either the single-threaded * or concurrent implementation. * * @param {AudioChunk[]} chunkFiles - Array of audio chunks to transcribe * @param {TranscribeAudioChunksOptions} [options] - Configuration options * @returns {Promise<Segment[]>} - Array of transcribed segments * @internal */ type TranscribeAudioChunksResult = { failures: FailedTranscription[]; transcripts: Segment[]; }; type ResumeOptions = Pick<TranscribeAudioChunksOptions, 'callbacks' | 'concurrency' | 'retries'>; declare const resumeFailedTranscriptions: (error: Pick<TranscriptionError, "failures" | "transcripts">, options?: ResumeOptions) => Promise<TranscribeAudioChunksResult>; //#endregion //#region src/utils/constants.d.ts declare const MAX_CHUNK_DURATION: number; declare const MIN_CHUNK_DURATION = 4; declare const MIN_CONCURRENCY = 1; //#endregion //#region src/index.d.ts /** * Initializes the tafrigh library with the provided Wit.ai API keys. * * @param {Object} options - Configuration options for initialization * @param {string[]} options.apiKeys - Array of Wit.ai API keys to use for transcription * @example * import { init } from 'tafrigh'; * init({ apiKeys: ['your-wit-ai-key'] }); */ declare const init: (options: { apiKeys: string[]; }) => void; /** * Transcribes audio content and returns an array of transcript segments. * * This function takes an audio file (or stream) and returns a structured transcript with * timestamps. It handles preprocessing the audio, splitting it into chunks, and * transcribing each chunk using Wit.ai's API. * * @param {string | ReadStream} content - Path to audio file, URL, or readable stream * @param {TranscribeOptions} [options] - Configuration options for transcription * @returns {Promise<Array>} - Promise resolving to an array of transcript segments * @throws {Error} - If transcription fails or if options validation fails * @example * import { transcribe } from 'tafrigh'; * * const transcript = await transcribe('path/to/audio.mp3', { * concurrency: 2, * splitOptions: { * chunkDuration: 60, * silenceDetection: { silenceThreshold: -30 } * } * }); * * console.log(transcript); * // [{ text: "Hello world", start: 0, end: 2.5 }, ...] */ declare const transcribe: (content: ReadStream | string, options?: TranscribeOptions) => Promise<Segment[]>; //#endregion export { Callbacks, FailedTranscription, MAX_CHUNK_DURATION, MIN_CHUNK_DURATION, MIN_CONCURRENCY, Segment, Token, TranscribeOptions, TranscriptionError, WitAiResponse, init, resumeFailedTranscriptions, transcribe }; //# sourceMappingURL=index.d.mts.map