UNPKG

@opensubtitles/video-metadata-extractor

Version:

A comprehensive NPM package for video metadata extraction and subtitle processing using FFmpeg WASM. Supports metadata extraction, individual subtitle extraction, batch subtitle extraction with ZIP downloads, and memory-safe processing of files of any siz

opensubtitles.github.io/video-metadata-extractor-js/

opensubtitles/video-metadata-extractor-js

941 lines • 73.7 kB

JavaScript

import { useState, useCallback, useEffect, useRef } from 'react'; import { FFmpeg } from '@ffmpeg/ffmpeg'; import { fetchFile } from '@ffmpeg/util'; import { cachedToBlobURL } from '../utils/ffmpegCache.js'; import JSZip from 'jszip'; // Helper function to get file format from filename const getFormatFromFileName = (filename) => { const extension = filename.split('.').pop()?.toLowerCase(); return extension || 'unknown'; }; // Helper function to generate subtitle filename based on movie name const generateSubtitleFilename = (movieFilename, language, isForced, codecName) => { // Remove extension from movie filename const nameWithoutExt = movieFilename.replace(/\.[^/.]+$/, ''); // Map language codes to standard 2-letter codes const languageMap = { 'eng': 'en', 'english': 'en', 'spa': 'es', 'spanish': 'es', 'fre': 'fr', 'french': 'fr', 'ger': 'de', 'german': 'de', 'ita': 'it', 'italian': 'it', 'por': 'pt', 'portuguese': 'pt', 'rus': 'ru', 'russian': 'ru', 'jpn': 'ja', 'japanese': 'ja', 'chi': 'zh', 'chinese': 'zh', 'kor': 'ko', 'korean': 'ko', 'ara': 'ar', 'arabic': 'ar', 'dut': 'nl', 'dutch': 'nl', 'swe': 'sv', 'swedish': 'sv', 'nor': 'no', 'norwegian': 'no', 'dan': 'da', 'danish': 'da', 'fin': 'fi', 'finnish': 'fi', 'pol': 'pl', 'polish': 'pl', 'cze': 'cs', 'czech': 'cs', 'hun': 'hu', 'hungarian': 'hu', 'gre': 'el', 'greek': 'el', 'tur': 'tr', 'turkish': 'tr', 'heb': 'he', 'hebrew': 'he', 'tha': 'th', 'thai': 'th', 'vie': 'vi', 'vietnamese': 'vi', 'hin': 'hi', 'hindi': 'hi', 'und': 'unknown', 'unknown': 'unknown' }; // Get standardized language code const langCode = language ? (languageMap[language.toLowerCase()] || language.toLowerCase()) : 'unknown'; // Determine file extension based on codec let extension = 'srt'; // Default to SRT if (codecName) { const codec = codecName.toLowerCase(); if (codec.includes('ass') || codec.includes('ssa')) { extension = 'ass'; } else if (codec.includes('vtt') || codec.includes('webvtt')) { extension = 'vtt'; } else if (codec.includes('srt') || codec.includes('subrip')) { extension = 'srt'; } else if (codec.includes('dvd') || codec.includes('vobsub')) { extension = 'srt'; // Convert DVD subtitles to SRT } } // Build filename: MovieName.lang[.forced].ext let filename = `${nameWithoutExt}.${langCode}`; if (isForced) { filename += '.forced'; } filename += `.${extension}`; return { filename, extension }; }; // Helper function to safely decode data for preview const safeDecodePreview = (data, maxLength = 200) => { if (typeof data === 'string') { return data.slice(0, maxLength); } const previewData = data.slice(0, Math.min(maxLength, data.length)); return new TextDecoder().decode(previewData); }; // Helper function to create complete file data using streaming chunks for 100% subtitle extraction // Uses unified chunked strategy for all file sizes to keep implementation simple and consistent const createCompleteFileDataInChunks = async (file) => { const fileSize = file.size; const fileSizeMB = Math.round(fileSize / 1024 / 1024); console.log(`[COMPLETE CHUNKED READING] Processing entire file (${fileSizeMB}MB) using unified chunked strategy`); // Use 500MB chunks for all files to maintain consistent memory usage and simplify logic const chunkSize = 500 * 1024 * 1024; // 500MB chunks const chunks = []; const totalChunks = Math.ceil(fileSize / chunkSize); console.log(`[COMPLETE CHUNKED READING] Creating ${totalChunks} chunks of up to 500MB each`); for (let offset = 0; offset < fileSize; offset += chunkSize) { const end = Math.min(offset + chunkSize, fileSize); const chunk = file.slice(offset, end); chunks.push(chunk); const chunkNumber = Math.floor(offset / chunkSize) + 1; const progress = Math.round((chunkNumber / totalChunks) * 100); console.log(`[COMPLETE CHUNKED READING] Created chunk ${chunkNumber}/${totalChunks} (${Math.round((end - offset) / 1024 / 1024)}MB) - ${progress}% complete`); // Add small delay to prevent UI blocking during chunking for large files if (chunkNumber % 5 === 0) { await new Promise(resolve => setTimeout(resolve, 10)); } } // Combine all chunks into a single blob representing the complete file const completeFile = new Blob(chunks, { type: file.type }); console.log(`[COMPLETE CHUNKED READING] Combined ${chunks.length} chunks into complete file blob (${Math.round(completeFile.size / 1024 / 1024)}MB)`); console.log(`[COMPLETE CHUNKED READING] Unified chunked strategy ensures consistent performance for any file size`); return completeFile; }; // Helper function for progressive chunk extraction for massive files const extractSubtitleFromMultipleChunks = async (ffmpeg, file, streamIndex, outputFormat, outputFilename, progressCallback) => { const fileSize = file.size; const chunkSize = 64 * 1024 * 1024; // 64MB chunks for progressive extraction const maxChunks = 10; // Limit to 10 chunks max to prevent memory issues console.log(`[PROGRESSIVE EXTRACTION] Starting progressive extraction for ${(fileSize / 1024 / 1024 / 1024).toFixed(1)}GB file`); // Calculate strategic chunk positions const totalChunks = Math.min(maxChunks, Math.ceil(fileSize / (500 * 1024 * 1024))); // One chunk per 500MB const chunkPositions = []; for (let i = 0; i < totalChunks; i++) { const position = Math.floor((fileSize / totalChunks) * i); chunkPositions.push(position); } console.log(`[PROGRESSIVE EXTRACTION] Will try ${chunkPositions.length} chunk positions:`, chunkPositions.map(p => `${(p / 1024 / 1024 / 1024).toFixed(1)}GB`)); // Try each chunk until we find subtitles for (let i = 0; i < chunkPositions.length; i++) { const position = chunkPositions[i]; const chunkEnd = Math.min(position + chunkSize, fileSize); const chunk = file.slice(position, chunkEnd); progressCallback(20 + (i / chunkPositions.length) * 60, `Trying chunk ${i + 1}/${chunkPositions.length} at ${(position / 1024 / 1024 / 1024).toFixed(1)}GB...`); try { console.log(`[PROGRESSIVE EXTRACTION] Trying chunk ${i + 1} at position ${(position / 1024 / 1024 / 1024).toFixed(1)}GB`); // Clean up previous chunk try { await ffmpeg.deleteFile('input.video'); } catch (cleanupError) { // Continue } // Load chunk await ffmpeg.writeFile('input.video', await fetchFile(chunk)); // Try extraction const chunkFilename = `chunk_${i}_${outputFilename}`; await ffmpeg.exec([ '-i', 'input.video', '-map', `0:${streamIndex}`, '-c:s', outputFormat, chunkFilename ]); // Check if we got any subtitle data const subtitleData = await ffmpeg.readFile(chunkFilename); if (subtitleData.length > 0) { console.log(`[PROGRESSIVE EXTRACTION] Found subtitles in chunk ${i + 1}! Size: ${subtitleData.length} bytes`); // Clean up chunk file try { await ffmpeg.deleteFile(chunkFilename); } catch (cleanupError) { // Continue } return subtitleData; } // Clean up chunk file try { await ffmpeg.deleteFile(chunkFilename); } catch (cleanupError) { // Continue } } catch (chunkError) { console.log(`[PROGRESSIVE EXTRACTION] Chunk ${i + 1} failed:`, chunkError); // Continue to next chunk } } console.log(`[PROGRESSIVE EXTRACTION] No subtitles found in any of the ${chunkPositions.length} chunks`); return null; }; // Helper function to handle large file downloads (> 2GB blob limit) const downloadLargeFile = (data, filename, progressCallback) => { const CHUNK_SIZE = 100 * 1024 * 1024; // 100MB chunks const MAX_BLOB_SIZE = 2 * 1024 * 1024 * 1024; // 2GB limit console.log(`[Large File Download] Starting download for ${filename}, size: ${data.length} bytes`); // If file is smaller than 2GB, use normal blob download if (data.length < MAX_BLOB_SIZE) { const blob = new Blob([data], { type: 'application/octet-stream' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); console.log(`[Large File Download] Normal blob download completed for ${filename}`); return; } // For files > 2GB, use chunked download via streams console.log(`[Large File Download] Using chunked download for ${filename} (${data.length} bytes)`); // Create a download stream const stream = new ReadableStream({ start(controller) { let offset = 0; const pump = () => { if (offset < data.length) { const chunk = data.slice(offset, Math.min(offset + CHUNK_SIZE, data.length)); controller.enqueue(chunk); offset += chunk.length; // Report progress if (progressCallback) { progressCallback((offset / data.length) * 100); } // Continue with next chunk setTimeout(pump, 0); } else { controller.close(); } }; pump(); } }); // Create response from stream and trigger download const response = new Response(stream); response.blob().then(blob => { const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); console.log(`[Large File Download] Chunked download completed for ${filename}`); }).catch(error => { console.error(`[Large File Download] Chunked download failed:`, error); throw error; }); }; export const useVideoMetadata = () => { const ffmpegRef = useRef(new FFmpeg()); const [isLoaded, setIsLoaded] = useState(false); const [metadata, setMetadata] = useState(null); const [progress, setProgress] = useState({ isVisible: false, progress: 0, text: '' }); const [error, setError] = useState({ isVisible: false, message: '' }); const showProgress = useCallback((text, progressPercent = 0) => { setProgress({ isVisible: true, progress: progressPercent, text }); }, []); const hideProgress = useCallback(() => { setProgress({ isVisible: false, progress: 0, text: '' }); }, []); const showError = useCallback((message) => { setError({ isVisible: true, message }); }, []); const hideError = useCallback(() => { setError({ isVisible: false, message: '' }); }, []); const processMP4File = useCallback((file) => { return file; }, []); // Memory-efficient FFmpeg cleanup and reuse const cleanupFFmpegFiles = useCallback(async () => { if (!ffmpegRef.current) return; try { // Add delay before cleanup to ensure any pending operations complete await new Promise(resolve => setTimeout(resolve, 500)); // Clean up known temporary files with retry logic const tempFiles = ['input.video', 'output.video', 'subtitle.srt', 'subtitle.ass', 'subtitle.vtt']; for (const fileName of tempFiles) { let retries = 3; while (retries > 0) { try { await ffmpegRef.current.deleteFile(fileName); break; } catch (err) { retries--; if (retries > 0) { await new Promise(resolve => setTimeout(resolve, 100)); } } } } // Add delay before listing directory await new Promise(resolve => setTimeout(resolve, 200)); // List and clean up any remaining files try { const files = await ffmpegRef.current.listDir('/'); const systemDirs = new Set(['.', '..', 'tmp', 'home', 'dev', 'proc', 'usr', 'bin', 'etc', 'var', 'lib']); for (const fileInfo of files) { const fileName = typeof fileInfo === 'string' ? fileInfo : fileInfo.name; const isDir = typeof fileInfo === 'object' && fileInfo.isDir; // Skip system directories if (!systemDirs.has(fileName) && !isDir) { let retries = 3; while (retries > 0) { try { await ffmpegRef.current.deleteFile(fileName); break; } catch (err) { retries--; if (retries > 0) { await new Promise(resolve => setTimeout(resolve, 100)); } } } } } } catch (listError) { // Continue on error } // Force garbage collection if available if (typeof window !== 'undefined' && window.gc) { window.gc(); } // Final delay to ensure cleanup is complete await new Promise(resolve => setTimeout(resolve, 300)); } catch (cleanupError) { // Continue on error } }, []); const extractMetadata = useCallback(async (file) => { if (!isLoaded) return; try { console.log(`[FFMPEG DEBUG] extractMetadata called for: ${file.name}`); showProgress('Processing video...'); // Clean up any existing files before starting console.log(`[FFMPEG DEBUG] Starting cleanup before processing`); await cleanupFFmpegFiles(); console.log(`[FFMPEG DEBUG] Cleanup completed`); // Add additional delay after cleanup to ensure virtual filesystem is ready await new Promise(resolve => setTimeout(resolve, 500)); // Validate file size (prevent extremely large files that could cause memory issues) const fileSize = file.size; if (fileSize === 0) { throw new Error(`File "${file.name}" appears to be empty`); } // Validate file size (prevent extremely large files that could cause memory issues) // Note: Smart chunking now handles large files efficiently // Check if file has a valid extension based on FFmpeg demuxers const validExtensions = [ // Common video formats 'mp4', 'm4v', 'mov', '3gp', '3g2', 'mj2', // QuickTime/MOV family 'avi', // AVI 'mkv', 'webm', // Matroska/WebM 'flv', // Flash Video 'asf', 'wmv', // Windows Media 'mpg', 'mpeg', 'ts', 'm2ts', // MPEG formats 'ogv', 'ogg', // Ogg 'gif', // GIF 'swf', // SWF 'rm', 'rmvb', // RealMedia 'dv', // DV 'mxf', // MXF 'nut', // NUT 'nuv', // NuppelVideo 'roq', // id RoQ 'nsv', // Nullsoft Streaming Video 'wtv', // Windows Television 'ty', // TiVo 'pva', // TechnoTrend PVA 'ivf', // On2 IVF 'yuv', // YUV4MPEG 'r3d', // REDCODE R3D // Audio formats that may contain video 'aac', 'mp3', 'flac', 'wav', 'wv', 'ape', 'mpc', 'tta', 'tak', 'au', 'caf', 'w64', 'voc', 'aiff', 'gsm', 'amr', 'ac3', 'eac3', 'dts', 'dtshd', 'truehd', 'mlp', 'opus', 'vorbis', 'spx' ]; const extension = file.name.split('.').pop()?.toLowerCase(); if (!extension || !validExtensions.includes(extension)) { throw new Error(`Unsupported file format: ${extension || 'unknown'}. Supported formats include: mp4, avi, mov, mkv, webm, flv, 3gp, wmv, mpg, ogg, and many others.`); } // Smart chunked reading strategy for metadata extraction let fileData; // Use unified chunked strategy for all file types to ensure consistent performance console.log(`[FULL EXTRACTION DEBUG] Using unified chunked strategy for ${extension?.toUpperCase() || 'unknown'} file`); fileData = await createCompleteFileDataInChunks(file); console.log(`[FULL EXTRACTION DEBUG] Complete file processing ready, size: ${fileData.size} bytes`); showProgress(`Processing complete file (${Math.round(fileData.size / 1024 / 1024)}MB) with chunked strategy...`, 20); // Write file to FFmpeg virtual filesystem showProgress('Loading file into FFmpeg...'); // Retry file write operation with exponential backoff let writeAttempts = 0; const maxWriteAttempts = 3; let writeSuccess = false; while (writeAttempts < maxWriteAttempts && !writeSuccess) { try { writeAttempts++; // Ensure we have a clean state before writing try { await ffmpegRef.current.deleteFile('input.video'); } catch (cleanupError) { // Continue } // Add delay between retry attempts if (writeAttempts > 1) { const delay = Math.pow(2, writeAttempts - 1) * 100; // Exponential backoff await new Promise(resolve => setTimeout(resolve, delay)); } // Prepare file data const fileBuffer = await fetchFile(fileData); // Add timeout to prevent hanging const writePromise = ffmpegRef.current.writeFile('input.video', fileBuffer); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error('File write timeout')), 30000); // 30 second timeout }); await Promise.race([writePromise, timeoutPromise]); // File write completed successfully writeSuccess = true; } catch (writeError) { // Clean up any partial state try { await ffmpegRef.current.deleteFile('input.video'); } catch (cleanupError) { // Continue } // If this was the last attempt, throw the error if (writeAttempts >= maxWriteAttempts) { throw new Error(`Failed to load file into FFmpeg after ${maxWriteAttempts} attempts: ${writeError instanceof Error ? writeError.message : 'Unknown error'}`); } } } // Capture FFmpeg log output to parse metadata let ffmpegLogs = []; const logHandler = ({ message }) => { ffmpegLogs.push(message); }; // Add temporary log handler ffmpegRef.current.on('log', logHandler); // Use simple -i command to get metadata info showProgress('Extracting metadata...'); try { // Add timeout to prevent hanging const execPromise = ffmpegRef.current.exec(['-i', 'input.video']); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error('FFmpeg execution timeout')), 60000); // 60 second timeout }); await Promise.race([execPromise, timeoutPromise]); } catch (ffmpegError) { // Expected error for info extraction - FFmpeg always "fails" with -i command } // Remove log handler ffmpegRef.current.off('log', logHandler); // Check if we got any useful log output if (ffmpegLogs.length === 0) { throw new Error('FFmpeg did not produce any output. The file might be corrupted or unsupported.'); } // Parse the FFmpeg log output to extract metadata const logOutput = ffmpegLogs.join('\n'); // Check for specific error patterns in FFmpeg output if (logOutput.includes('Invalid data found when processing input') || logOutput.includes('No such file or directory') || logOutput.includes('Operation not permitted')) { // Check if this is a virtual filesystem issue if (logOutput.includes('No such file or directory')) { throw new Error(`Failed to load file "${file.name}" into FFmpeg virtual filesystem. This may be due to memory constraints or file corruption.`); } throw new Error(`File "${file.name}" appears to be corrupted or not a valid media file`); } if (logOutput.includes('Decoder (codec ') && logOutput.includes('not found')) { throw new Error('Media file uses an unsupported codec. Try converting to a standard format like MP4.'); } // Check if we found any stream information if (!logOutput.includes('Stream #')) { throw new Error('No audio or video streams found in the file. The file might be corrupted or encrypted.'); } // Parse video stream info first (needed for FPS calculation) const videoStreamMatch = logOutput.match(/Stream.*Video: ([^,]+)[^,]*,.*?(\d+x\d+)[^,]*,.*?(\d+\.?\d*) fps/); const videoCodec = videoStreamMatch ? videoStreamMatch[1] : 'unknown'; const resolution = videoStreamMatch ? videoStreamMatch[2] : 'unknown'; const fps = videoStreamMatch ? parseFloat(videoStreamMatch[3]) : 25; // Parse duration from log - convert to total seconds for the component const durationMatch = logOutput.match(/Duration: (\d{2}):(\d{2}):(\d{2})\.(\d{2})/); let duration = 'unknown'; let movietimems = 'unknown'; let movieframes = 'unknown'; if (durationMatch) { const hours = parseInt(durationMatch[1]); const minutes = parseInt(durationMatch[2]); const seconds = parseInt(durationMatch[3]); const centiseconds = parseInt(durationMatch[4]); const totalSeconds = hours * 3600 + minutes * 60 + seconds; const totalMilliseconds = totalSeconds * 1000 + centiseconds * 10; duration = totalSeconds.toString(); // Send as total seconds string movietimems = totalMilliseconds.toString(); // Movie time in milliseconds // Calculate total frames based on FPS const totalFrames = Math.round(totalMilliseconds / 1000 * fps); movieframes = totalFrames.toString(); } // Parse bitrate from log - convert to number (bits per second) const bitrateMatch = logOutput.match(/bitrate: (\d+) kb\/s/); const bitrate = bitrateMatch ? (parseInt(bitrateMatch[1]) * 1000).toString() : 'unknown'; // Parse audio stream info - updated regex to match the actual format const audioStreamMatch = logOutput.match(/Stream.*Audio: ([^,]+)[^,]*,.*?(\d+) Hz/); const audioCodec = audioStreamMatch ? audioStreamMatch[1] : 'unknown'; const sampleRate = audioStreamMatch ? audioStreamMatch[2] : '48000'; // Parse audio bitrate from the BPS metadata instead const audioBpsMatch = logOutput.match(/Stream #0:1.*\n(?:.*\n)*?.*BPS\s*:\s*(\d+)/); const audioBitrate = audioBpsMatch ? Math.round(parseInt(audioBpsMatch[1]) / 1000).toString() : '128'; // Parse subtitle streams from FFmpeg output const subtitleStreams = []; // Match subtitle streams with pattern: Stream #0:2(eng): Subtitle: subrip (default) const subtitleMatches = logOutput.match(/Stream #\d+:\d+(?:\([^)]*\))?: Subtitle: ([^(\n]+)(?:\([^)]*\))?[^\n]*(?:\n[^\n]*)*?(?:BPS\s*:\s*(\d+))?/g); if (subtitleMatches) { subtitleMatches.forEach(match => { // Extract codec name const codecMatch = match.match(/Subtitle: ([^(\n]+?)(?:\s*\(|$)/); const codecName = codecMatch ? codecMatch[1].trim() : 'unknown'; // Extract stream index const streamMatch = match.match(/Stream #\d+:(\d+)/); const streamIndex = streamMatch ? parseInt(streamMatch[1]) : undefined; // Extract language from parentheses after stream number const langMatch = match.match(/Stream #\d+:\d+\(([^)]+)\)/); const language = langMatch ? langMatch[1] : undefined; // Check for default and forced flags const isDefault = match.includes('(default)'); const isForced = match.includes('(forced)'); // Extract subtitle size from BPS (bits per second) or stream size let subtitleSize = 'unknown'; const bpsMatch = match.match(/BPS\s*:\s*(\d+)/); if (bpsMatch && duration !== 'unknown') { // Calculate size from bitrate and duration const bps = parseInt(bpsMatch[1]); const durationSeconds = parseInt(duration); const sizeBytes = Math.round((bps * durationSeconds) / 8); subtitleSize = sizeBytes.toString(); } else { // Try to find stream size in metadata tags const sizeMatch = match.match(/NUMBER_OF_BYTES[^\d]*(\d+)/); if (sizeMatch) { subtitleSize = sizeMatch[1]; } } subtitleStreams.push({ codec_type: 'subtitle', codec_name: codecName, language: language, default: isDefault, forced: isForced, index: streamIndex, size: subtitleSize }); }); } // Debug data logging removed for cleaner console output // Create comprehensive metadata structure with actual extracted data using discriminated unions const videoStream = { codec_type: 'video', codec_name: videoCodec, width: resolution !== 'unknown' ? parseInt(resolution.split('x')[0]) : 1280, height: resolution !== 'unknown' ? parseInt(resolution.split('x')[1]) : 720, r_frame_rate: `${fps}/1`, pix_fmt: 'yuv420p', bit_rate: '1500000', index: 0 // First video stream }; const audioStream = { codec_type: 'audio', codec_name: audioCodec, channels: 2, sample_rate: sampleRate, bit_rate: `${audioBitrate}000`, index: 1 // First audio stream }; // Convert subtitle streams to proper discriminated union type const typedSubtitleStreams = subtitleStreams.map(stream => ({ ...stream, codec_type: 'subtitle', index: stream.index || 0 })); const extractedMetadata = { format: { filename: file.name, size: fileSize.toString(), format_name: getFormatFromFileName(file.name), duration: duration, bit_rate: bitrate, fps: fps.toString(), movietimems: movietimems, movieframes: movieframes }, streams: [ videoStream, audioStream, ...typedSubtitleStreams ] }; console.log(`[FFMPEG DEBUG] Metadata extracted successfully for: ${file.name}`); setMetadata(extractedMetadata); // Clean up after successful processing console.log(`[FFMPEG DEBUG] Starting cleanup after successful processing`); await cleanupFFmpegFiles(); console.log(`[FFMPEG DEBUG] Final cleanup completed for: ${file.name}`); hideProgress(); } catch (err) { console.error(`[FFMPEG DEBUG] Processing error for ${file.name}:`, err); // Provide more specific error messages based on error type let errorMessage; if (err instanceof Error) { errorMessage = err.message; } else if (typeof err === 'string') { errorMessage = err; } else { errorMessage = 'Unknown error occurred while processing the video'; } // Add helpful context for common issues if (errorMessage.includes('out of memory') || errorMessage.includes('memory')) { errorMessage += '. Try using a smaller video file or refresh the page to clear memory.'; } else if (errorMessage.includes('network') || errorMessage.includes('fetch')) { errorMessage += '. Please check your internet connection and try again.'; } else if (errorMessage.includes('timeout')) { errorMessage += '. The file processing took too long. Try with a smaller file.'; } console.log(`[FFMPEG DEBUG] Showing error for ${file.name}: ${errorMessage}`); showError(errorMessage); hideProgress(); // Clean up after error console.log(`[FFMPEG DEBUG] Starting cleanup after error for: ${file.name}`); await cleanupFFmpegFiles(); console.log(`[FFMPEG DEBUG] Error cleanup completed for: ${file.name}`); } }, [isLoaded, showProgress, hideProgress, showError, processMP4File, cleanupFFmpegFiles]); // Load FFmpeg following the official example pattern useEffect(() => { const load = async () => { const ffmpeg = ffmpegRef.current; if (ffmpeg.loaded) { setIsLoaded(true); return; } try { // FFmpeg logging disabled for cleaner console output // Use the same pattern as the official example const baseURL = 'https://unpkg.com/@ffmpeg/core@0.12.6/dist/esm'; await ffmpeg.load({ coreURL: await cachedToBlobURL(`${baseURL}/ffmpeg-core.js`, 'text/javascript'), wasmURL: await cachedToBlobURL(`${baseURL}/ffmpeg-core.wasm`, 'application/wasm'), }); setIsLoaded(true); } catch (err) { console.error('Failed to load FFmpeg:', err); let errorMessage = 'Failed to load FFmpeg'; if (err instanceof Error) { const message = err.message.toLowerCase(); if (message.includes('network') || message.includes('fetch')) { errorMessage = 'Failed to load FFmpeg: Network error. Please check your internet connection and try refreshing the page.'; } else if (message.includes('wasm')) { errorMessage = 'Failed to load FFmpeg: WebAssembly not supported. Please use a modern browser (Chrome 57+, Firefox 52+, Safari 11+, Edge 79+).'; } else if (message.includes('cors')) { errorMessage = 'Failed to load FFmpeg: CORS error. Please refresh the page and try again.'; } else { errorMessage = `Failed to load FFmpeg: ${err.message}`; } } showError(errorMessage); } }; load(); }, [showError]); const extractSubtitle = useCallback(async (file, streamIndex, language, codecName, isForced) => { if (!isLoaded) return; try { console.log(`[QUICK EXTRACTION DEBUG] Starting quick extraction for: ${file.name}, streamIndex: ${streamIndex}, language: ${language}, codec: ${codecName}, forced: ${isForced}`); showProgress(`Preparing quick subtitle track ${streamIndex}...`, 10); await new Promise(resolve => setTimeout(resolve, 100)); // Clean up before extraction console.log(`[QUICK EXTRACTION DEBUG] Starting cleanup...`); await cleanupFFmpegFiles(); console.log(`[QUICK EXTRACTION DEBUG] Cleanup completed`); // Quick extraction strategy - use chunks for large files to support up to 5GB const fileSize = file.size; const extension = file.name.split('.').pop()?.toLowerCase(); let fileData; console.log(`[QUICK EXTRACTION DEBUG] File size: ${fileSize} bytes (${Math.round(fileSize / 1024 / 1024)}MB), extension: ${extension}`); // Generate proper filename based on movie name - add "quick" suffix const { filename: baseFilename, extension: outputExt } = generateSubtitleFilename(file.name, language, isForced, codecName); // Add "quick" suffix to distinguish from full extraction const outputFilename = baseFilename.replace(/(\.[^.]+)$/, '.quick$1'); console.log(`[QUICK EXTRACTION DEBUG] Generated filename: ${outputFilename}, output extension: ${outputExt}`); // Determine output format based on extension let outputFormat = outputExt; if (outputExt === 'vtt') { outputFormat = 'webvtt'; } console.log(`[QUICK EXTRACTION DEBUG] Output format: ${outputFormat}`); // Support files up to 50GB with progressive chunking const isVeryLargeFile = fileSize > 5 * 1024 * 1024 * 1024; // > 5GB // For massive files (>5GB), use progressive extraction instead of loading chunks into memory if (isVeryLargeFile) { console.log(`[QUICK EXTRACTION DEBUG] Very large file detected (${(fileSize / 1024 / 1024 / 1024).toFixed(1)}GB), using progressive chunk extraction`); try { showProgress(`Scanning large file for subtitles...`, 20); // Use progressive extraction for massive files const progressiveResult = await extractSubtitleFromMultipleChunks(ffmpegRef.current, file, streamIndex, outputFormat, outputFilename, (progress, text) => showProgress(text, progress)); if (progressiveResult && progressiveResult.length > 0) { console.log(`[QUICK EXTRACTION DEBUG] Progressive extraction successful, size: ${progressiveResult.length} bytes`); // Use complete progressive result for 100% subtitle extraction const completeData = progressiveResult; console.log(`[QUICK EXTRACTION DEBUG] Complete progressive data size: ${completeData.length} bytes`); if (completeData.length > 0) { const preview = safeDecodePreview(completeData, 200); console.log(`[QUICK EXTRACTION DEBUG] Progressive content preview:`, preview); } // Create download with complete data const blob = new Blob([completeData], { type: 'text/plain' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = outputFilename; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); console.log(`[QUICK EXTRACTION DEBUG] Progressive download initiated successfully`); // Clean up after extraction await cleanupFFmpegFiles(); showProgress(`Quick extraction from large file completed! Complete subtitles (${completeData.length} bytes) downloaded.`, 100); return; } else { console.log(`[QUICK EXTRACTION DEBUG] Progressive extraction found no subtitles`); showProgress(`No subtitles found in large file chunks. Try the full extraction method.`, 100); await cleanupFFmpegFiles(); return; } } catch (progressiveError) { console.error(`[QUICK EXTRACTION DEBUG] Progressive extraction failed:`, progressiveError); showError(`Failed to extract from large file: ${progressiveError instanceof Error ? progressiveError.message : 'Unknown error'}`); await cleanupFFmpegFiles(); return; } } // Use unified chunked strategy for all file types to ensure consistent performance console.log(`[QUICK EXTRACTION DEBUG] Using unified chunked strategy for ${extension?.toUpperCase() || 'unknown'} file`); fileData = await createCompleteFileDataInChunks(file); console.log(`[QUICK EXTRACTION DEBUG] Complete file processing ready, size: ${fileData.size} bytes`); showProgress(`Quick extraction processing complete file (${Math.round(fileData.size / 1024 / 1024)}MB) with chunked strategy...`, 20); // Write file to FFmpeg virtual filesystem showProgress(`Loading for quick extraction...`, 30); await new Promise(resolve => setTimeout(resolve, 200)); console.log(`[QUICK EXTRACTION DEBUG] Writing file to FFmpeg virtual filesystem...`); await ffmpegRef.current.writeFile('input.video', await fetchFile(fileData)); console.log(`[QUICK EXTRACTION DEBUG] File write completed`); try { // Quick extraction - first try without time limit to see if subtitles exist showProgress(`Quick extracting subtitle track (${outputFormat} format)...`, 60); await new Promise(resolve => setTimeout(resolve, 200)); console.log(`[QUICK EXTRACTION DEBUG] Starting FFmpeg execution with parameters:`); const ffmpegArgs = [ '-i', 'input.video', '-map', `0:${streamIndex}`, '-c:s', outputFormat, outputFilename ]; console.log(`[QUICK EXTRACTION DEBUG] FFmpeg args (no time limit):`, ffmpegArgs); // Extract without time limit first to see if any subtitles exist in the chunk await ffmpegRef.current.exec(ffmpegArgs); console.log(`[QUICK EXTRACTION DEBUG] FFmpeg execution completed successfully`); } catch (extractError) { console.log(`[QUICK EXTRACTION DEBUG] Primary extraction failed, trying SRT fallback:`, extractError); // If native format fails, try converting to SRT without time limit outputFormat = 'srt'; const srtFilename = outputFilename.replace(/\.[^/.]+$/, '.srt'); console.log(`[QUICK EXTRACTION DEBUG] SRT fallback filename: ${srtFilename}`); showProgress(`Quick extracting subtitle track (SRT fallback)...`, 60); await new Promise(resolve => setTimeout(resolve, 200)); const srtArgs = [ '-i', 'input.video', '-map', `0:${streamIndex}`, '-c:s', 'srt', srtFilename ]; console.log(`[QUICK EXTRACTION DEBUG] SRT fallback args (no time limit):`, srtArgs); await ffmpegRef.current.exec(srtArgs); console.log(`[QUICK EXTRACTION DEBUG] SRT fallback execution completed`); // Check what files exist after SRT extraction try { const files = await ffmpegRef.current.listDir('/'); console.log(`[QUICK EXTRACTION DEBUG] Files after SRT extraction:`, files); const subtitleFiles = files.filter((file) => { const fileName = typeof file === 'string' ? file : file.name; return fileName && (fileName.includes('.srt') || fileName.includes('.ass') || fileName.includes('.vtt')); }); console.log(`[QUICK EXTRACTION DEBUG] Found subtitle files:`, subtitleFiles); } catch (listError) { console.warn(`[QUICK EXTRACTION DEBUG] Could not list files:`, listError); } // Read and process the extracted subtitle file showProgress(`Preparing quick subtitle download...`, 80); console.log(`[QUICK EXTRACTION DEBUG] Reading SRT file: ${srtFilename}`); try { const subtitleData = await ffmpegRef.current.readFile(srtFilename); console.log(`[QUICK EXTRACTION DEBUG] SRT file read successfully, size: ${subtitleData.length} bytes`); if (subtitleData.length > 0) { const preview = safeDecodePreview(subtitleData, 200); console.log(`[QUICK EXTRACTION DEBUG] SRT file content preview:`, preview); } // Use complete subtitle data for 100% extraction const completeData = subtitleData; console.log(`[QUICK EXTRACTION DEBUG] Complete SRT data size: ${completeData.length} bytes`); // Create download with complete data const blob = new Blob([completeData], { type: 'text/plain' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = srtFilename; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); console.log(`[QUICK EXTRACTION DEBUG] SRT download initiated successfully`); // Clean up after extraction await cleanupFFmpegFiles(); hideProgress(); return; } catch (readError) { console.error(`[QUICK EXTRACTION DEBUG] Failed to read SRT file:`, readError); throw readError; } } // Check what files exist after primary extraction try { const files = await ffmpegRef.current.listDir('/'); console.log(`[QUICK EXTRACTION DEBUG] Files after primary extraction:`, files); const subtitleFiles = files.filter((file) => { const fileName = typeof file === 'string' ? file : file.name; return fileName && (fileName.includes('.srt') || fileName.includes('.ass') || fileName.includes('.vtt') || fileName.includes('.sub')); }); console.log(`[QUICK EXTRACTION DEBUG] Found subtitle files:`, subtitleFiles); } catch (listError) { console.warn(`[QUICK EXTRACTION DEBUG] Could not list files:`, listError); } // Read the extracted subtitle file and limit to ~300 bytes showProgress(`Preparing quick subtitle download...`, 80); console.log(`[QUICK EXTRACTION DEBUG] Reading primary file: ${outputFilename}`); try { const subtitleData = await ffmpegRef.current.readFile(outputFilename); console.log(`[QUICK EXTRACTION DEBUG] Primary file read successfully, size: ${subtitleData.length} bytes`); // If the extracted subtitle is empty, try extracting from middle chunk for MKV files if (subtitleData.length === 0 && (extension === 'mkv' || extension === 'webm') && fileSize > 200 * 1024 * 1024) { console.log(`[QUICK EXTRACTION DEBUG] Empty subtitle detected, trying middle chunk extraction...`); // Try extracting from middle portion of the file const middleStart = Math.floor(fileSize / 2); const middleChunk = file.slice(middleStart, middleStart + 100 * 1024 * 1024); // 100MB from middle showProgress(`Retrying with middle chunk (${Math.round(middleChunk.size / 1024 / 1024)}MB)...`, 70); // Clean up and try with middle chunk await cleanupFFmpegFiles(); await ffmpegRef.current.writeFile('input.video', await fetchFile(middleChunk)); const middleFilename = outputFilename.replace('.quick.', '.middle.'); console.log(`[QUICK EXTRACTION DEBUG] Trying middle extraction with filename: ${middleFilename}`); try { await ffmpegRef.current.exec([ '-i', 'input.video', '-map', `0:${streamIndex}`, '-c:s', outputFormat, middleFilename ]); const middleSubtitleData = await ffmpegRef.current.readFile(middleFilename); console.log(`[QUICK EXTRACTION DEBUG] Middle extraction completed, size: ${middleSubtitleData.length} bytes`); if (middleSubtitleData.length > 0) { const completeData = middleSubtitleData; console.log(`[QUICK EXTRACTION DEBUG] Using complete middle chunk data, size: ${completeData.length} bytes`); const preview = safeDecodePreview(completeData, 200); console.log(`[QUICK EXTRACTION DEBUG] Middle chunk content preview:`, preview); const blob = new Blob([completeData], { type: 'text/plain' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = outputFilename; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); console.log(`[QUICK EXTRACTION DEBUG] Middle chunk download initiated successfully`); await cleanupFFmpegFiles(); showProgress(`Quick subtitle extraction completed! Complete subtitles from middle chunk (${completeData.length} bytes) downloaded.`, 100); return; } } catch (middleError) { console.log(`[QUICK EXTRACTION DEBUG] Middle chunk extraction failed:`, middleError); } } if (subtitleData.length > 0) { const preview = safeDecodePreview(su