defuddle
Version:
Extract article content and metadata from web pages.
38 lines (37 loc) • 1.25 kB
TypeScript
/**
* Standardized transcript HTML and text construction.
*
* Used by YouTube (and potentially other video/audio extractors)
* to produce consistent transcript markup.
*/
export interface TranscriptSegment {
/** Start time in seconds */
start: number;
/** Segment text (plain, not HTML-escaped) */
text: string;
/** Whether this segment starts a new speaker turn */
speakerChange: boolean;
/** Speaker index (0, 1, ...) for CSS classes */
speaker?: number;
}
export interface TranscriptChapter {
title: string;
/** Start time in seconds */
start: number;
}
export interface TranscriptResult {
html: string;
text: string;
}
/**
* Format seconds as a human-readable timestamp (M:SS or H:MM:SS).
*/
export declare function formatTimestamp(seconds: number): string;
/**
* Build transcript HTML and text from segments and optional chapters.
*
* @param site - Site identifier for wrapper class (e.g. "youtube")
* @param segments - Grouped transcript segments with timestamps and speaker info
* @param chapters - Optional chapter headings with start times
*/
export declare function buildTranscript(site: string, segments: TranscriptSegment[], chapters?: TranscriptChapter[]): TranscriptResult;