yaytt
Version:
Blazingly fast YouTube caption extractor with deduplication.
28 lines (27 loc) • 836 B
TypeScript
import { Caption } from './types.js';
export interface DeduplicationOptions {
enabled?: boolean;
timeThreshold?: number;
similarityThreshold?: number;
mergePartialMatches?: boolean;
aggressiveMode?: boolean;
}
export declare class CaptionDeduplicator {
private options;
constructor(options?: DeduplicationOptions);
deduplicate(captions: Caption[]): Caption[];
private groupSimilarCaptions;
private areSimilarCaptions;
private removeTextContinuations;
private isPartialRepeat;
private finalCleanup;
private ultraAggressiveCleanup;
private isContinuation;
private hasSignificantWordOverlap;
private mergeCaptions;
private removeRedundantCaptions;
private calculateSimilarity;
private levenshteinDistance;
private cleanText;
private isMoreComplete;
}