UNPKG

ai-youtube-transcript

Version:

Fetch and process transcripts from YouTube videos with support for multiple languages, translation, and formatting

github.com/vishnumishra/ai-youtube-transcript

vishnumishra/ai-youtube-transcript

974 lines (953 loc) • 36 kB

JavaScript

#!/usr/bin/env node 'use strict'; var fs = require('fs'); /*! ***************************************************************************** Copyright (c) Microsoft Corporation. Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ***************************************************************************** */ function __awaiter(thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); } /** * Base error class for YouTube transcript errors */ class YoutubeTranscriptError extends Error { constructor(message) { super(`[YoutubeTranscript] 🚨 ${message}`); this.name = 'YoutubeTranscriptError'; } } /** * Error thrown when YouTube is receiving too many requests */ class YoutubeTranscriptTooManyRequestError extends YoutubeTranscriptError { constructor() { super('YouTube is receiving too many requests from this IP and now requires solving a captcha to continue'); this.name = 'YoutubeTranscriptTooManyRequestError'; } } /** * Error thrown when the video is no longer available */ class YoutubeTranscriptVideoUnavailableError extends YoutubeTranscriptError { constructor(videoId) { super(`The video is no longer available (${videoId})`); this.name = 'YoutubeTranscriptVideoUnavailableError'; } } /** * Error thrown when transcript is disabled on the video */ class YoutubeTranscriptDisabledError extends YoutubeTranscriptError { constructor(videoId) { super(`Transcript is disabled on this video (${videoId})`); this.name = 'YoutubeTranscriptDisabledError'; } } /** * Error thrown when no transcripts are available for the video */ class YoutubeTranscriptNotAvailableError extends YoutubeTranscriptError { constructor(videoId) { super(`No transcripts are available for this video (${videoId})`); this.name = 'YoutubeTranscriptNotAvailableError'; } } /** * Error thrown when the transcript cannot be translated */ class YoutubeTranscriptTranslationError extends YoutubeTranscriptError { constructor(message) { super(message); this.name = 'YoutubeTranscriptTranslationError'; } } /** * Constants used throughout the application */ class Constants { } /** * Regular expression to extract YouTube video ID from URL */ Constants.RE_YOUTUBE = /(?:youtube\.com\/(?:[^\/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?\/\s]{11})/i; /** * User agent string for requests */ Constants.USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36,gzip(gfe)'; /** * Regular expression to parse XML transcript */ Constants.RE_XML_TRANSCRIPT = /<text start="([^"]*)" dur="([^"]*)">([^<]*)<\/text>/g; /** * Represents a fetched transcript with its snippets and metadata */ class FetchedTranscript { /** * Creates a new FetchedTranscript instance * * @param snippets - The transcript snippets * @param videoId - The YouTube video ID * @param language - The language name * @param languageCode - The language code * @param isGenerated - Whether the transcript was auto-generated */ constructor(snippets, videoId, language, languageCode, isGenerated) { this.snippets = snippets; this.videoId = videoId; this.language = language; this.languageCode = languageCode; this.isGenerated = isGenerated; } /** * Convert to raw data format */ toRawData() { return this.snippets.map(snippet => ({ text: snippet.text, duration: snippet.duration, offset: snippet.start, lang: this.languageCode, isGenerated: this.isGenerated })); } /** * Get the full transcript text */ getText() { return this.snippets.map(snippet => snippet.text).join(' '); } /** * Implement iterator protocol */ [Symbol.iterator]() { let index = 0; const snippets = this.snippets; return { next: () => { if (index < snippets.length) { return { value: snippets[index++], done: false }; } else { return { done: true, value: undefined }; } } }; } /** * Get the length of the transcript */ get length() { return this.snippets.length; } } /** * Represents a transcript with metadata */ class Transcript { /** * Creates a new Transcript instance * * @param videoId - The YouTube video ID * @param language - The language name * @param languageCode - The language code * @param isGenerated - Whether the transcript is auto-generated * @param isTranslatable - Whether the transcript can be translated * @param translationLanguages - Available translation languages * @param baseUrl - The base URL for fetching the transcript * @param httpClient - The HTTP client to use for requests */ constructor(videoId, language, languageCode, isGenerated, isTranslatable, translationLanguages, baseUrl, httpClient = null) { this.videoId = videoId; this.language = language; this.languageCode = languageCode; this.isGenerated = isGenerated; this.isTranslatable = isTranslatable; this.translationLanguages = translationLanguages; this.baseUrl = baseUrl; this.httpClient = httpClient; } /** * Fetch the actual transcript data * * @param preserveFormatting - Whether to preserve HTML formatting */ fetch(preserveFormatting = false) { return __awaiter(this, void 0, void 0, function* () { const transcriptResponse = yield fetch(this.baseUrl, { headers: { 'Accept-Language': this.languageCode, 'User-Agent': Constants.USER_AGENT, }, }); if (!transcriptResponse.ok) { throw new YoutubeTranscriptNotAvailableError(this.videoId); } const transcriptBody = yield transcriptResponse.text(); const results = [...transcriptBody.matchAll(Constants.RE_XML_TRANSCRIPT)]; const snippets = results.map((result) => ({ text: preserveFormatting ? result[3] : result[3].replace(/<[^>]*>/g, ''), start: parseFloat(result[1]), duration: parseFloat(result[2]), })); return new FetchedTranscript(snippets, this.videoId, this.language, this.languageCode, this.isGenerated); }); } /** * Translate the transcript to another language * * @param languageCode - The language code to translate to */ translate(languageCode) { var _a; if (!this.isTranslatable) { throw new YoutubeTranscriptTranslationError('This transcript cannot be translated'); } if (!this.translationLanguages.some(lang => lang.languageCode === languageCode)) { throw new YoutubeTranscriptTranslationError(`This transcript cannot be translated to ${languageCode}. Available languages: ${this.translationLanguages.map(lang => lang.languageCode).join(', ')}`); } // Construct the translation URL by adding the tlang parameter const translationUrl = `${this.baseUrl}&tlang=${languageCode}`; // Find the language name for the given language code const languageName = ((_a = this.translationLanguages.find(lang => lang.languageCode === languageCode)) === null || _a === void 0 ? void 0 : _a.languageName) || languageCode; return new Transcript(this.videoId, languageName, languageCode, this.isGenerated, false, // Translated transcripts cannot be translated further [], // No translation languages for a translated transcript translationUrl, this.httpClient); } } /** * Represents a list of available transcripts for a video */ class TranscriptList { /** * Creates a new TranscriptList instance * * @param transcripts - The available transcripts * @param videoId - The YouTube video ID */ constructor(transcripts, videoId) { this.transcripts = transcripts; this.videoId = videoId; } /** * Find a transcript in the specified languages * * @param languageCodes - List of language codes in order of preference */ findTranscript(languageCodes) { for (const languageCode of languageCodes) { const transcript = this.transcripts.find(t => t.languageCode === languageCode); if (transcript) { return transcript; } } throw new YoutubeTranscriptError(`No transcripts found in languages: ${languageCodes.join(', ')} for video ${this.videoId}. ` + `Available languages: ${this.transcripts.map(t => t.languageCode).join(', ')}`); } /** * Find a manually created transcript in the specified languages * * @param languageCodes - List of language codes in order of preference */ findManuallyCreatedTranscript(languageCodes) { for (const languageCode of languageCodes) { const transcript = this.transcripts.find(t => t.languageCode === languageCode && !t.isGenerated); if (transcript) { return transcript; } } const availableLanguages = this.transcripts .filter(t => !t.isGenerated) .map(t => t.languageCode); throw new YoutubeTranscriptError(`No manually created transcripts found in languages: ${languageCodes.join(', ')} for video ${this.videoId}. ` + `Available languages: ${availableLanguages.join(', ') || 'none'}`); } /** * Find an automatically generated transcript in the specified languages * * @param languageCodes - List of language codes in order of preference */ findGeneratedTranscript(languageCodes) { for (const languageCode of languageCodes) { const transcript = this.transcripts.find(t => t.languageCode === languageCode && t.isGenerated); if (transcript) { return transcript; } } const availableLanguages = this.transcripts .filter(t => t.isGenerated) .map(t => t.languageCode); throw new YoutubeTranscriptError(`No automatically generated transcripts found in languages: ${languageCodes.join(', ')} for video ${this.videoId}. ` + `Available languages: ${availableLanguages.join(', ') || 'none'}`); } /** * Get all transcripts */ getTranscripts() { return [...this.transcripts]; } /** * Implement iterator protocol */ [Symbol.iterator]() { let index = 0; const transcripts = this.transcripts; return { next: () => { if (index < transcripts.length) { return { value: transcripts[index++], done: false }; } else { return { done: true, value: undefined }; } } }; } } /** * Helper functions for the YouTube transcript API */ class Helpers { /** * Extract the YouTube video ID from a URL or ID string * * @param videoId - The YouTube video URL or ID * @returns The extracted video ID */ static extractVideoId(videoId) { // If the input is already a valid video ID (11 characters) if (videoId.length === 11) { return videoId; } // Try to extract the ID from a URL const matchId = videoId.match(Constants.RE_YOUTUBE); if (matchId && matchId.length > 1) { return matchId[1]; } throw new YoutubeTranscriptError('Impossible to retrieve YouTube video ID.'); } /** * Parse the captions data from YouTube's response * * @param html - The HTML content of the YouTube page * @returns The parsed captions data or undefined if not found */ static parseCaptionsFromHtml(html) { const splittedHtml = html.split('"captions":'); if (splittedHtml.length <= 1) { return undefined; } try { const captionsJson = splittedHtml[1].split(',"videoDetails')[0].replace('\n', ''); const parsedJson = JSON.parse(captionsJson); return parsedJson === null || parsedJson === void 0 ? void 0 : parsedJson.playerCaptionsTracklistRenderer; } catch (e) { return undefined; } } /** * Check if the HTML contains a CAPTCHA challenge * * @param html - The HTML content to check * @returns Whether the HTML contains a CAPTCHA challenge */ static hasCaptchaChallenge(html) { return html.includes('class="g-recaptcha"'); } /** * Check if the video is available * * @param html - The HTML content to check * @returns Whether the video is available */ static isVideoAvailable(html) { return html.includes('"playabilityStatus":'); } } /** * HTTP client for making requests to YouTube */ class HttpClient { /** * Creates a new HttpClient instance * * @param cookiePath - Path to a cookies.txt file * @param proxyConfig - Proxy configuration */ constructor(cookiePath, proxyConfig) { this.cookiePath = cookiePath; this.proxyConfig = proxyConfig; if (cookiePath) { this.loadCookies(cookiePath); } } /** * Load cookies from a file * * @param cookiePath - Path to the cookies file */ loadCookies(cookiePath) { try { if (fs.existsSync(cookiePath)) { const cookieContent = fs.readFileSync(cookiePath, 'utf-8'); const cookies = []; // Parse Netscape format cookies const lines = cookieContent.split('\n'); for (const line of lines) { // Skip comments and empty lines if (line.startsWith('#') || line.trim() === '') { continue; } const parts = line.split('\t'); if (parts.length >= 7) { const domain = parts[0]; // const path = parts[2]; // const secure = parts[3] === 'TRUE'; const expiration = parseInt(parts[4], 10); const name = parts[5]; const value = parts[6]; // Only include cookies for youtube.com if (domain.includes('youtube.com') && expiration > Math.floor(Date.now() / 1000)) { cookies.push(`${name}=${value}`); } } } if (cookies.length > 0) { this.cookieHeader = cookies.join('; '); } } } catch (error) { console.error('Error loading cookies:', error); } } /** * Make a fetch request with the configured options * * @param url - The URL to fetch * @param options - Additional fetch options */ fetch(url, options = {}) { return __awaiter(this, void 0, void 0, function* () { const headers = Object.assign({ 'User-Agent': Constants.USER_AGENT }, options.headers); if (this.cookieHeader) { headers.Cookie = this.cookieHeader; } const fetchOptions = Object.assign(Object.assign({}, options), { headers }); // Add proxy if configured if (this.proxyConfig) { // In a browser environment, we can't directly set proxy // In Node.js, we would use an agent here // This is a placeholder for actual implementation console.log('Using proxy:', url.startsWith('https') ? this.proxyConfig.getHttpsProxyUrl() : this.proxyConfig.getHttpProxyUrl()); } return fetch(url, fetchOptions); }); } } /** * Main class for retrieving YouTube transcripts */ class YoutubeTranscript { /** * Legacy static method for backward compatibility * * @param videoId - Video URL or video identifier * @param config - Configuration options * @deprecated Use the instance method instead */ static fetchTranscript(videoId, config) { return __awaiter(this, void 0, void 0, function* () { const instance = new YoutubeTranscript(); const transcript = yield instance.fetch(videoId, config); return transcript.toRawData(); }); } /** * Creates a new YoutubeTranscript instance * * @param cookiePath - Path to a cookies.txt file for authentication * @param proxyConfig - Proxy configuration for handling IP bans */ constructor(cookiePath, proxyConfig) { this.httpClient = new HttpClient(cookiePath, proxyConfig); } /** * Fetch transcript from a YouTube video * * @param videoId - Video URL or video identifier * @param config - Configuration options */ fetch(videoId, config) { return __awaiter(this, void 0, void 0, function* () { const identifier = Helpers.extractVideoId(videoId); const languages = this.getLanguagesFromConfig(config); // Get the list of available transcripts const transcriptList = yield this.list(identifier); // Find the transcript in the requested languages const transcript = transcriptList.findTranscript(languages); // Fetch the transcript data return transcript.fetch(config === null || config === void 0 ? void 0 : config.preserveFormatting); }); } /** * List all available transcripts for a video * * @param videoId - Video URL or video identifier */ list(videoId) { return __awaiter(this, void 0, void 0, function* () { const identifier = Helpers.extractVideoId(videoId); // Fetch the video page const videoPageResponse = yield this.httpClient.fetch(`https://www.youtube.com/watch?v=${identifier}`); const videoPageBody = yield videoPageResponse.text(); // Check for errors if (Helpers.hasCaptchaChallenge(videoPageBody)) { throw new YoutubeTranscriptTooManyRequestError(); } if (!Helpers.isVideoAvailable(videoPageBody)) { throw new YoutubeTranscriptVideoUnavailableError(identifier); } // Parse captions data const captions = Helpers.parseCaptionsFromHtml(videoPageBody); if (!captions) { throw new YoutubeTranscriptDisabledError(identifier); } if (!('captionTracks' in captions)) { throw new YoutubeTranscriptNotAvailableError(identifier); } // Parse global translation languages (if available) let globalTranslationLanguages = []; if (captions.translationLanguages) { globalTranslationLanguages = captions.translationLanguages.map((lang) => ({ languageCode: lang.languageCode, languageName: lang.languageName.simpleText })); } // Create transcript objects for each caption track const transcripts = captions.captionTracks.map((track) => { // Use track-specific translation languages if available, otherwise use global ones const translationLanguages = track.translationLanguages ? track.translationLanguages.map(lang => ({ languageCode: lang.languageCode, languageName: lang.languageName.simpleText })) : globalTranslationLanguages; return new Transcript(identifier, track.name.simpleText, track.languageCode, track.kind === 'asr', // 'asr' means auto-generated !!track.isTranslatable, translationLanguages, track.baseUrl, this.httpClient); }); return new TranscriptList(transcripts, identifier); }); } /** * Get languages from config, handling both new and legacy formats * * @param config - The transcript configuration */ getLanguagesFromConfig(config) { if (!config) { return ['en']; // Default to English } if (config.languages && config.languages.length > 0) { return config.languages; } if (config.lang) { return [config.lang]; } return ['en']; } } /** * Formats transcripts as JSON */ class JSONFormatter { /** * Format a single transcript as JSON * * @param transcript - The transcript to format * @param options - JSON.stringify options */ formatTranscript(transcript, options) { return JSON.stringify(transcript.toRawData(), null, options === null || options === void 0 ? void 0 : options.indent); } /** * Format multiple transcripts as JSON * * @param transcripts - The transcripts to format * @param options - JSON.stringify options */ formatTranscripts(transcripts, options) { return JSON.stringify(transcripts.map(transcript => ({ videoId: transcript.videoId, language: transcript.language, languageCode: transcript.languageCode, isGenerated: transcript.isGenerated, transcript: transcript.toRawData() })), null, options === null || options === void 0 ? void 0 : options.indent); } } /** * Formats transcripts as plain text */ class TextFormatter { /** * Format a single transcript as plain text * * @param transcript - The transcript to format */ formatTranscript(transcript) { return transcript.snippets .map(snippet => snippet.text) .join(' '); } /** * Format multiple transcripts as plain text * * @param transcripts - The transcripts to format */ formatTranscripts(transcripts) { return transcripts .map(transcript => { return `[${transcript.videoId} - ${transcript.language}]\n${this.formatTranscript(transcript)}`; }) .join('\n\n'); } } /** * Formats transcripts in SRT (SubRip) format */ class SRTFormatter { /** * Format a single transcript in SRT format * * @param transcript - The transcript to format */ formatTranscript(transcript) { return transcript.snippets .map((snippet, index) => { const startTime = this.formatTime(snippet.start); const endTime = this.formatTime(snippet.start + snippet.duration); return `${index + 1}\n${startTime} --> ${endTime}\n${snippet.text}\n`; }) .join('\n'); } /** * Format multiple transcripts in SRT format * * @param transcripts - The transcripts to format */ formatTranscripts(transcripts) { return transcripts .map(transcript => { return `WEBVTT - ${transcript.videoId} (${transcript.language})\n\n${this.formatTranscript(transcript)}`; }) .join('\n\n'); } /** * Format a time value in SRT format (HH:MM:SS,mmm) * * @param seconds - The time in seconds */ formatTime(seconds) { const date = new Date(seconds * 1000); const hours = date.getUTCHours().toString().padStart(2, '0'); const minutes = date.getUTCMinutes().toString().padStart(2, '0'); const secs = date.getUTCSeconds().toString().padStart(2, '0'); const ms = date.getUTCMilliseconds().toString().padStart(3, '0'); return `${hours}:${minutes}:${secs},${ms}`; } } /** * Generic proxy configuration */ class GenericProxyConfig { /** * Creates a new GenericProxyConfig instance * * @param httpUrl - The HTTP proxy URL * @param httpsUrl - The HTTPS proxy URL */ constructor(httpUrl, httpsUrl) { this.httpUrl = httpUrl; this.httpsUrl = httpsUrl; } /** * Get the HTTP proxy URL */ getHttpProxyUrl() { return this.httpUrl; } /** * Get the HTTPS proxy URL */ getHttpsProxyUrl() { return this.httpsUrl; } } /** * Configuration for Webshare proxies */ class WebshareProxyConfig { /** * Creates a new WebshareProxyConfig instance * * @param proxyUsername - The Webshare proxy username * @param proxyPassword - The Webshare proxy password */ constructor(proxyUsername, proxyPassword) { this.proxyUsername = proxyUsername; this.proxyPassword = proxyPassword; } /** * Get the HTTP proxy URL */ getHttpProxyUrl() { return `http://${this.proxyUsername}:${this.proxyPassword}@${WebshareProxyConfig.WEBSHARE_PROXY_HOST}:${WebshareProxyConfig.WEBSHARE_PROXY_PORT}`; } /** * Get the HTTPS proxy URL */ getHttpsProxyUrl() { return `http://${this.proxyUsername}:${this.proxyPassword}@${WebshareProxyConfig.WEBSHARE_PROXY_HOST}:${WebshareProxyConfig.WEBSHARE_PROXY_HTTPS_PORT}`; } } WebshareProxyConfig.WEBSHARE_PROXY_HOST = 'p.webshare.io'; WebshareProxyConfig.WEBSHARE_PROXY_PORT = '80'; WebshareProxyConfig.WEBSHARE_PROXY_HTTPS_PORT = '443'; /** * CLI implementation for ai-youtube-transcript */ // Parse command line arguments const args = process.argv.slice(2); const videoIds = []; let languages = ['en']; let format = 'text'; let outputFile = null; let translateTo = null; let listTranscripts = false; let excludeGenerated = false; let excludeManuallyCreated = false; let preserveFormatting = false; let cookiePath = null; let httpProxy = null; let httpsProxy = null; let webshareUsername = null; let websharePassword = null; // Parse arguments for (let i = 0; i < args.length; i++) { const arg = args[i]; if (arg === '--languages' || arg === '-l') { languages = args[++i].split(','); } else if (arg === '--format' || arg === '-f') { format = args[++i]; } else if (arg === '--output' || arg === '-o') { outputFile = args[++i]; } else if (arg === '--translate' || arg === '-t') { translateTo = args[++i]; } else if (arg === '--list-transcripts') { listTranscripts = true; } else if (arg === '--exclude-generated') { excludeGenerated = true; } else if (arg === '--exclude-manually-created') { excludeManuallyCreated = true; } else if (arg === '--preserve-formatting') { preserveFormatting = true; } else if (arg === '--cookies') { cookiePath = args[++i]; } else if (arg === '--http-proxy') { httpProxy = args[++i]; } else if (arg === '--https-proxy') { httpsProxy = args[++i]; } else if (arg === '--webshare-proxy-username') { webshareUsername = args[++i]; } else if (arg === '--webshare-proxy-password') { websharePassword = args[++i]; } else if (arg === '--help' || arg === '-h') { printHelp(); process.exit(0); } else if (!arg.startsWith('--')) { videoIds.push(arg); } } // Configure proxy if needed let proxyConfig = null; if (webshareUsername && websharePassword) { proxyConfig = new WebshareProxyConfig(webshareUsername, websharePassword); } else if (httpProxy || httpsProxy) { proxyConfig = new GenericProxyConfig(httpProxy || undefined, httpsProxy || undefined); } // Create YouTube transcript instance const ytTranscript = new YoutubeTranscript(cookiePath || undefined, proxyConfig || undefined); // Create formatter based on format let formatter; switch (format.toLowerCase()) { case 'json': formatter = new JSONFormatter(); break; case 'srt': formatter = new SRTFormatter(); break; case 'text': default: formatter = new TextFormatter(); break; } // Main function function main() { return __awaiter(this, void 0, void 0, function* () { try { if (listTranscripts) { if (videoIds.length === 0) { console.error('Error: Please provide a video ID to list transcripts'); process.exit(1); } yield listAvailableTranscripts(videoIds[0]); } else { if (videoIds.length === 0) { console.error('Error: Please provide at least one video ID'); process.exit(1); } yield fetchTranscripts(videoIds); } } catch (error) { console.error(`Error: ${error.message}`); process.exit(1); } }); } // List available transcripts function listAvailableTranscripts(videoId) { return __awaiter(this, void 0, void 0, function* () { const transcriptList = yield ytTranscript.list(videoId); console.log(`Available transcripts for video ${videoId}:`); console.log('---------------------------------------------'); for (const transcript of transcriptList) { console.log(`Language: ${transcript.language} (${transcript.languageCode})`); console.log(`Auto-generated: ${transcript.isGenerated ? 'Yes' : 'No'}`); console.log(`Translatable: ${transcript.isTranslatable ? 'Yes' : 'No'}`); if (transcript.isTranslatable && transcript.translationLanguages.length > 0) { console.log('Available translations:'); for (const lang of transcript.translationLanguages) { console.log(` - ${lang.languageName} (${lang.languageCode})`); } } console.log('---------------------------------------------'); } }); } // Fetch transcripts for all video IDs function fetchTranscripts(videoIds) { return __awaiter(this, void 0, void 0, function* () { const results = []; for (const videoId of videoIds) { try { let transcript; // Get the list of available transcripts const transcriptList = yield ytTranscript.list(videoId); // Find the appropriate transcript based on options if (excludeGenerated) { transcript = transcriptList.findManuallyCreatedTranscript(languages); } else if (excludeManuallyCreated) { transcript = transcriptList.findGeneratedTranscript(languages); } else { transcript = transcriptList.findTranscript(languages); } // Translate if requested if (translateTo) { transcript = transcript.translate(translateTo); } // Fetch the transcript data const fetchedTranscript = yield transcript.fetch(preserveFormatting); results.push(fetchedTranscript); } catch (error) { console.error(`Error fetching transcript for video ${videoId}: ${error.message}`); } } if (results.length === 0) { console.error('No transcripts were successfully fetched'); process.exit(1); } // Format the results const formattedOutput = results.length === 1 ? formatter.formatTranscript(results[0], { indent: 2 }) : formatter.formatTranscripts(results, { indent: 2 }); // Output the results if (outputFile) { fs.writeFileSync(outputFile, formattedOutput); console.log(`Transcripts written to ${outputFile}`); } else { console.log(formattedOutput); } }); } // Print help information function printHelp() { console.log(` AI YouTube Transcript CLI Usage: ai-youtube-transcript <videoId> [options] ai-youtube-transcript --list-transcripts <videoId> Options: --languages, -l <langs> Comma-separated list of language codes in order of preference (default: en) --format, -f <format> Output format: text, json, srt (default: text) --output, -o <file> Write output to a file instead of stdout --translate, -t <lang> Translate transcript to the specified language --list-transcripts List all available transcripts for the video --exclude-generated Only use manually created transcripts --exclude-manually-created Only use automatically generated transcripts --preserve-formatting Preserve HTML formatting in the transcript --cookies <path> Path to cookies.txt file for authentication --http-proxy <url> HTTP proxy URL --https-proxy <url> HTTPS proxy URL --webshare-proxy-username <u> Webshare proxy username --webshare-proxy-password <p> Webshare proxy password --help, -h Show this help message Examples: ai-youtube-transcript dQw4w9WgXcQ ai-youtube-transcript dQw4w9WgXcQ --languages fr,en,es ai-youtube-transcript dQw4w9WgXcQ --format json --output transcript.json ai-youtube-transcript dQw4w9WgXcQ --translate de ai-youtube-transcript --list-transcripts dQw4w9WgXcQ `); } // Run the main function main();