tafrigh
Version:
A NodeJS library for transcribing audio/video to text.
159 lines (154 loc) • 5 kB
TypeScript
import { PreprocessingCallbacks, SplitOnSilenceCallbacks, TimeRange, PreprocessOptions, SplitOptions } from 'ffmpeg-simplified';
import { Readable } from 'node:stream';
/**
* Callbacks for monitoring and responding to various stages of the transcription process.
* Extends preprocessing and splitting callbacks from ffmpeg-simplified.
*/
interface Callbacks extends PreprocessingCallbacks, SplitOnSilenceCallbacks, SplitOnSilenceCallbacks {
/**
* Called when all transcription is complete.
* @param {Segment[]} transcripts - Array of all transcribed segments
* @returns {Promise<void>}
*/
onTranscriptionFinished?: (transcripts: Segment[]) => Promise<void>;
/**
* Called when each individual chunk is transcribed.
* @param {number} chunkIndex - Index of the current chunk being processed
*/
onTranscriptionProgress?: (chunkIndex: number) => void;
/**
* Called just before the transcription process begins.
* @param {number} totalChunks - Total number of chunks to be transcribed
* @returns {Promise<void>}
*/
onTranscriptionStarted?: (totalChunks: number) => Promise<void>;
}
/**
* Represents a segment of transcribed audio with timing information.
* May include detailed token-level information when available.
*/
type Segment = Token & {
/**
* Word-by-word breakdown of the transcription with individual timings
*/
tokens?: Token[];
};
/**
* Represents a token (word or phrase) in the transcription with timing information.
*/
type Token = TimeRange & {
/**
* Confidence score for this transcription (between 0 and 1)
*/
confidence?: number;
/**
* The transcribed text
*/
text: string;
};
/**
* Configuration options for the transcribe function.
*/
type TranscribeOptions = {
/**
* Callbacks for monitoring progress and responding to events
*/
callbacks?: Callbacks;
/**
* Maximum number of concurrent transcription operations
* Limited by the number of available API keys
*/
concurrency?: number;
/**
* Options for audio preprocessing (noise reduction, filtering, etc.)
*/
preprocessOptions?: PreprocessOptions;
/**
* If true, temporary processing directories won't be deleted
* Useful for debugging
*/
preventCleanup?: boolean;
/**
* Number of retry attempts for failed transcription requests
* Uses exponential backoff
*/
retries?: number;
/**
* Options for splitting audio into chunks
*/
splitOptions?: SplitOptions;
};
/**
* Response structure from the Wit.ai API.
* @internal
*/
type WitAiResponse = {
/**
* Confidence score for the entire transcription (between 0 and 1)
*/
confidence?: number;
/**
* The transcribed text
*/
text?: string;
/**
* Array of token objects with detailed timing information
*/
tokens?: WitAiToken[];
};
/**
* Token structure as returned by the Wit.ai API.
* @internal
*/
type WitAiToken = TimeRange & {
/**
* Confidence score for this token (between 0 and 1)
*/
confidence?: number;
/**
* The transcribed token text
*/
token: string;
};
declare const MAX_CHUNK_DURATION: number;
declare const MIN_CHUNK_DURATION = 4;
declare const MIN_CONCURRENCY = 1;
/**
* Initializes the tafrigh library with the provided Wit.ai API keys.
*
* @param {Object} options - Configuration options for initialization
* @param {string[]} options.apiKeys - Array of Wit.ai API keys to use for transcription
* @example
* import { init } from 'tafrigh';
* init({ apiKeys: ['your-wit-ai-key'] });
*/
declare const init: (options: {
apiKeys: string[];
}) => void;
/**
* Transcribes audio content and returns an array of transcript segments.
*
* This function takes an audio file (or stream) and returns a structured transcript with
* timestamps. It handles preprocessing the audio, splitting it into chunks, and
* transcribing each chunk using Wit.ai's API.
*
* @param {string | Readable} content - Path to audio file, URL, or readable stream
* @param {TranscribeOptions} [options] - Configuration options for transcription
* @returns {Promise<Array>} - Promise resolving to an array of transcript segments
* @throws {Error} - If transcription fails or if options validation fails
* @example
* import { transcribe } from 'tafrigh';
*
* const transcript = await transcribe('path/to/audio.mp3', {
* concurrency: 2,
* splitOptions: {
* chunkDuration: 60,
* silenceDetection: { silenceThreshold: -30 }
* }
* });
*
* console.log(transcript);
* // [{ text: "Hello world", start: 0, end: 2.5 }, ...]
*/
declare const transcribe: (content: Readable | string, options?: TranscribeOptions) => Promise<Segment[]>;
export { type Callbacks, MAX_CHUNK_DURATION, MIN_CHUNK_DURATION, MIN_CONCURRENCY, type Segment, type Token, type TranscribeOptions, type WitAiResponse, init, transcribe };