@opensubtitles/video-metadata-extractor
Version:
A comprehensive NPM package for video metadata extraction and subtitle processing using FFmpeg WASM. Supports metadata extraction, individual subtitle extraction, batch subtitle extraction with ZIP downloads, and memory-safe processing of files of any siz
844 lines (843 loc) • 64.3 kB
JavaScript
/**
* Optimized and simplified video metadata hook
* Reduces code duplication and improves maintainability
*/
import { useState, useCallback, useEffect, useRef } from 'react';
import { FFmpeg } from '@ffmpeg/ffmpeg';
import { fetchFile } from '@ffmpeg/util';
import { cachedToBlobURL } from '../utils/ffmpegCache.js';
import JSZip from 'jszip';
import { PROCESSING_CONSTANTS, FFMPEG_CONSTANTS, ERROR_MESSAGES } from '../constants/index.js';
import { withRetry, validateFile, sleep, formatFileSize, generateSubtitleFilename, getFormatFromFileName } from '../utils/common.js';
import { createFileProcessor } from '../utils/fileProcessor.js';
export const useOptimizedVideoMetadata = () => {
// State management
const ffmpegRef = useRef(new FFmpeg());
const initializingRef = useRef(false);
const [isLoaded, setIsLoaded] = useState(false);
const [metadata, setMetadata] = useState(null);
const [selectedFile, setSelectedFile] = useState(null);
const [currentMethod, setCurrentMethod] = useState('Unknown');
const [progress, setProgress] = useState({
isVisible: false,
progress: 0,
text: ''
});
const [error, setError] = useState({
isVisible: false,
message: ''
});
// Progress management
const showProgress = useCallback((text, progressPercent = 0) => {
setProgress({ isVisible: true, progress: progressPercent, text });
}, []);
const hideProgress = useCallback(() => {
setProgress({ isVisible: false, progress: 0, text: '' });
}, []);
// Error management
const showError = useCallback((message) => {
setError({ isVisible: true, message });
}, []);
const hideError = useCallback(() => {
setError({ isVisible: false, message: '' });
}, []);
// FFmpeg cleanup utility
const cleanupFFmpegFiles = useCallback(async () => {
if (!ffmpegRef.current)
return;
try {
await sleep(PROCESSING_CONSTANTS.TIMEOUTS.CLEANUP);
// Clean up known temporary files
const tempFiles = Object.values(FFMPEG_CONSTANTS.TEMP_FILES);
for (const fileName of tempFiles) {
await withRetry(() => ffmpegRef.current.deleteFile(fileName), PROCESSING_CONSTANTS.RETRY.MAX_ATTEMPTS, PROCESSING_CONSTANTS.RETRY.BASE_DELAY).catch(() => { });
}
// Clean up any remaining files
try {
const files = await ffmpegRef.current.listDir('/');
for (const fileInfo of files) {
const fileName = typeof fileInfo === 'string' ? fileInfo : fileInfo.name;
const isDir = typeof fileInfo === 'object' && fileInfo.isDir;
if (!FFMPEG_CONSTANTS.SYSTEM_DIRS.has(fileName) && !isDir) {
await withRetry(() => ffmpegRef.current.deleteFile(fileName), PROCESSING_CONSTANTS.RETRY.MAX_ATTEMPTS, PROCESSING_CONSTANTS.RETRY.BASE_DELAY).catch(() => { });
}
}
}
catch (listError) {
// Continue on error
}
await sleep(300); // Final delay
}
catch (cleanupError) {
// Continue on error - cleanup failures shouldn't stop the main operation
}
}, []);
// Parse metadata from FFmpeg logs
const parseMetadataFromLogs = useCallback((logOutput, file) => {
// Error checking
if (logOutput.includes('Invalid data found when processing input') ||
logOutput.includes('No such file or directory') ||
logOutput.includes('Operation not permitted')) {
throw new Error(`${ERROR_MESSAGES.FILE.CORRUPTED}: ${file.name}`);
}
if (!logOutput.includes('Stream #')) {
throw new Error(ERROR_MESSAGES.SUBTITLE.NO_TRACKS);
}
// Parse video stream info
const videoStreamMatch = logOutput.match(/Stream.*Video: ([^,]+)[^,]*,.*?(\d+x\d+)[^,]*,.*?(\d+\.?\d*) fps/);
const videoCodec = videoStreamMatch ? videoStreamMatch[1] : 'unknown';
const resolution = videoStreamMatch ? videoStreamMatch[2] : 'unknown';
const fps = videoStreamMatch ? parseFloat(videoStreamMatch[3]) : 25;
// Parse duration
const durationMatch = logOutput.match(/Duration: (\d{2}):(\d{2}):(\d{2})\.(\d{2})/);
let duration = 'unknown';
let movietimems = 'unknown';
let movieframes = 'unknown';
if (durationMatch) {
const hours = parseInt(durationMatch[1]);
const minutes = parseInt(durationMatch[2]);
const seconds = parseInt(durationMatch[3]);
const centiseconds = parseInt(durationMatch[4]);
const totalSeconds = hours * 3600 + minutes * 60 + seconds;
const totalMilliseconds = totalSeconds * 1000 + centiseconds * 10;
duration = totalSeconds.toString();
movietimems = totalMilliseconds.toString();
movieframes = Math.round(totalMilliseconds / 1000 * fps).toString();
}
// Parse bitrate
const bitrateMatch = logOutput.match(/bitrate: (\d+) kb\/s/);
const bitrate = bitrateMatch ? (parseInt(bitrateMatch[1]) * 1000).toString() : 'unknown';
// Parse audio stream info
const audioStreamMatch = logOutput.match(/Stream.*Audio: ([^,]+)[^,]*,.*?(\d+) Hz/);
const audioCodec = audioStreamMatch ? audioStreamMatch[1] : 'unknown';
const sampleRate = audioStreamMatch ? audioStreamMatch[2] : '48000';
// Parse subtitle streams
const subtitleStreams = [];
const subtitleMatches = logOutput.match(/Stream #\d+:\d+(?:\([^)]*\))?: Subtitle: ([^(\n]+)(?:\([^)]*\))?[^\n]*(?:\n[^\n]*)*?(?:BPS\s*:\s*(\d+))?/g);
if (subtitleMatches) {
subtitleMatches.forEach(match => {
const codecMatch = match.match(/Subtitle: ([^(\n]+?)(?:\s*\(|$)/);
const codecName = codecMatch ? codecMatch[1].trim() : 'unknown';
const streamMatch = match.match(/Stream #\d+:(\d+)/);
const streamIndex = streamMatch ? parseInt(streamMatch[1]) : 0;
const langMatch = match.match(/Stream #\d+:\d+\(([^)]+)\)/);
const language = langMatch ? langMatch[1] : undefined;
const isDefault = match.includes('(default)');
const isForced = match.includes('(forced)');
subtitleStreams.push({
codec_type: 'subtitle',
codec_name: codecName,
language: language,
default: isDefault,
forced: isForced,
index: streamIndex
});
});
}
// Create metadata structure
const videoStream = {
codec_type: 'video',
codec_name: videoCodec,
width: resolution !== 'unknown' ? parseInt(resolution.split('x')[0]) : 1280,
height: resolution !== 'unknown' ? parseInt(resolution.split('x')[1]) : 720,
r_frame_rate: `${fps}/1`,
pix_fmt: 'yuv420p',
bit_rate: '1500000',
index: 0
};
const audioStream = {
codec_type: 'audio',
codec_name: audioCodec,
channels: 2,
sample_rate: sampleRate,
bit_rate: '128000',
index: 1
};
return {
format: {
filename: file.name,
size: file.size.toString(),
format_name: getFormatFromFileName(file.name),
duration,
bit_rate: bitrate,
fps: fps.toString(),
movietimems,
movieframes
},
streams: [videoStream, audioStream, ...subtitleStreams]
};
}, []);
// Extract metadata from file
const extractMetadata = useCallback(async (file) => {
if (!ffmpegRef.current.loaded)
return;
// Validate file
const validation = validateFile(file);
if (!validation.isValid) {
throw new Error(validation.errors.join('; '));
}
try {
showProgress('Processing video...', 10);
await cleanupFFmpegFiles();
await sleep(500);
let fileData;
// Check if it's an MKV/WebM file
const isMKV = file.name.toLowerCase().endsWith('.mkv') || file.name.toLowerCase().endsWith('.webm');
if (isMKV) {
// For MKV files, always use 10MB chunk regardless of file size
console.log(`[METADATA EXTRACTION] MKV file detected (${formatFileSize(file.size)}), using 10MB chunk extraction`);
showProgress(`MKV file detected - reading first 10MB for metadata...`, 25);
fileData = file.slice(0, PROCESSING_CONSTANTS.CHUNK_SIZES.MKV_METADATA_CHUNK);
console.log(`[METADATA EXTRACTION] Reading first ${formatFileSize(PROCESSING_CONSTANTS.CHUNK_SIZES.MKV_METADATA_CHUNK)} of MKV file for metadata`);
}
else {
// For other files, use the existing logic
const LARGE_FILE_THRESHOLD = 5 * 1024 * 1024 * 1024; // 5GB
const HEADER_SIZE = 1 * 1024 * 1024; // 1MB header should contain metadata
if (file.size > LARGE_FILE_THRESHOLD) {
console.log(`[METADATA EXTRACTION] Large file detected (${formatFileSize(file.size)}), using header-only extraction`);
showProgress(`Large file detected (${formatFileSize(file.size)}) - reading header only...`, 25);
fileData = file.slice(0, HEADER_SIZE);
console.log(`[METADATA EXTRACTION] Reading first ${formatFileSize(HEADER_SIZE)} of file for metadata`);
}
else {
console.log(`[METADATA EXTRACTION] Normal file size (${formatFileSize(file.size)}), using complete file processing`);
showProgress('Loading file...', 30);
// Process complete file for smaller files
const fileProcessor = createFileProcessor({
chunkSize: PROCESSING_CONSTANTS.CHUNK_SIZES.COMPLETE_FILE,
debug: false
});
fileData = await fileProcessor.processCompleteFile(file);
}
}
showProgress('Loading into FFmpeg...', 50);
// Write file with timeout and retry
await withRetry(async () => {
try {
await ffmpegRef.current.deleteFile(FFMPEG_CONSTANTS.TEMP_FILES.INPUT);
}
catch (e) { /* ignore */ }
const writePromise = ffmpegRef.current.writeFile(FFMPEG_CONSTANTS.TEMP_FILES.INPUT, await fetchFile(fileData));
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error(ERROR_MESSAGES.FFMPEG.TIMEOUT)), PROCESSING_CONSTANTS.TIMEOUTS.FILE_WRITE);
});
await Promise.race([writePromise, timeoutPromise]);
}, PROCESSING_CONSTANTS.RETRY.FILE_WRITE_ATTEMPTS);
showProgress('Extracting metadata...', 70);
// Capture FFmpeg log output
const ffmpegLogs = [];
const logHandler = ({ message }) => {
ffmpegLogs.push(message);
};
ffmpegRef.current.on('log', logHandler);
try {
const execPromise = ffmpegRef.current.exec(['-i', FFMPEG_CONSTANTS.TEMP_FILES.INPUT]);
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error(ERROR_MESSAGES.FFMPEG.TIMEOUT)), PROCESSING_CONSTANTS.TIMEOUTS.FFMPEG_EXECUTION);
});
await Promise.race([execPromise, timeoutPromise]);
}
catch (ffmpegError) {
// Expected error for info extraction
}
ffmpegRef.current.off('log', logHandler);
if (ffmpegLogs.length === 0) {
throw new Error('FFmpeg did not produce any output. The file might be corrupted.');
}
showProgress('Parsing metadata...', 90);
const parsedMetadata = parseMetadataFromLogs(ffmpegLogs.join('\n'), file);
setMetadata(parsedMetadata);
setSelectedFile(file);
setCurrentMethod('Optimized Metadata Extraction');
await cleanupFFmpegFiles();
hideProgress();
}
catch (error) {
await cleanupFFmpegFiles();
const errorMessage = error instanceof Error ? error.message : 'Failed to extract metadata';
showError(errorMessage);
hideProgress();
throw error;
}
}, [showProgress, hideProgress, showError, cleanupFFmpegFiles, parseMetadataFromLogs]);
// Progressive subtitle extraction for large files - collect from ALL chunks
const extractSubtitleFromMultipleChunks = useCallback(async (file, streamIndex, outputFormat, outputFilename) => {
const fileSize = file.size;
const chunkSize = 8 * 1024 * 1024; // 8MB chunks for progressive extraction (reduced further to avoid FS errors)
const maxChunks = 15; // Increase to 15 chunks with smaller size to cover more of the file
console.log(`[PROGRESSIVE EXTRACTION] Starting progressive extraction for ${formatFileSize(fileSize)} file`);
// Calculate strategic chunk positions - focus on areas where subtitles are likely
const totalChunks = Math.min(maxChunks, Math.ceil(fileSize / (200 * 1024 * 1024))); // One chunk per 200MB
const chunkPositions = [];
// Add beginning of file (metadata area)
chunkPositions.push(0);
// Add positions throughout the file
for (let i = 1; i < totalChunks; i++) {
const position = Math.floor((fileSize / totalChunks) * i);
chunkPositions.push(position);
}
// Add end of file (often contains subtitle data in MKV files)
if (fileSize > chunkSize * 2) {
const endPosition = Math.max(fileSize - chunkSize, chunkPositions[chunkPositions.length - 1] + chunkSize);
if (!chunkPositions.includes(endPosition)) {
chunkPositions.push(endPosition);
}
}
console.log(`[PROGRESSIVE EXTRACTION] Will try ${chunkPositions.length} chunk positions:`, chunkPositions.map(p => `${formatFileSize(p)}`));
// Collect subtitle data from ALL chunks that work
const collectedSubtitleData = [];
let successfulChunks = 0;
let failedChunks = 0;
// Try EVERY chunk and collect all subtitle data
for (let i = 0; i < chunkPositions.length; i++) {
const position = chunkPositions[i];
const chunkEnd = Math.min(position + chunkSize, fileSize);
const chunk = file.slice(position, chunkEnd);
showProgress(`Processing chunk ${i + 1}/${chunkPositions.length} at ${formatFileSize(position)}...`, 20 + (i / chunkPositions.length) * 70);
try {
console.log(`[PROGRESSIVE EXTRACTION] Processing chunk ${i + 1} at position ${formatFileSize(position)}, size: ${formatFileSize(chunk.size)}`);
// More aggressive cleanup before processing this chunk
await cleanupFFmpegFiles();
await sleep(100); // Brief pause for cleanup
try {
// Load chunk with better error handling
console.log(`[PROGRESSIVE EXTRACTION] Writing chunk ${i + 1} to FFmpeg FS...`);
await ffmpegRef.current.writeFile(FFMPEG_CONSTANTS.TEMP_FILES.INPUT, await fetchFile(chunk));
console.log(`[PROGRESSIVE EXTRACTION] Chunk ${i + 1} loaded successfully`);
}
catch (writeError) {
console.error(`[PROGRESSIVE EXTRACTION] Failed to write chunk ${i + 1} to FFmpeg FS:`, writeError);
throw writeError;
}
// Try extraction
let chunkFilename = `chunk_${i}_${outputFilename}`;
console.log(`[PROGRESSIVE EXTRACTION] Executing FFmpeg for chunk ${i + 1}...`);
try {
console.log(`[PROGRESSIVE EXTRACTION] Attempting extraction with format: ${outputFormat}`);
await ffmpegRef.current.exec([
'-i', FFMPEG_CONSTANTS.TEMP_FILES.INPUT,
'-map', `0:${streamIndex}`,
'-c:s', outputFormat,
chunkFilename
]);
console.log(`[PROGRESSIVE EXTRACTION] FFmpeg execution completed for chunk ${i + 1} with format ${outputFormat}`);
}
catch (execError) {
console.log(`[PROGRESSIVE EXTRACTION] Extraction with format ${outputFormat} failed for chunk ${i + 1}, error:`, execError);
// If the requested format failed and it's not already 'copy', try raw extraction
if (outputFormat !== 'copy') {
try {
console.log(`[PROGRESSIVE EXTRACTION] Trying raw subtitle extraction (copy) for chunk ${i + 1}...`);
const rawFilename = chunkFilename.replace(/\.(srt|vtt|ass)$/, '.txt');
await ffmpegRef.current.exec([
'-i', FFMPEG_CONSTANTS.TEMP_FILES.INPUT,
'-map', `0:${streamIndex}`,
'-c:s', 'copy', // Copy raw subtitle stream
rawFilename
]);
console.log(`[PROGRESSIVE EXTRACTION] Raw subtitle extraction completed for chunk ${i + 1}`);
// Update filename for the rest of the processing
chunkFilename = rawFilename;
}
catch (rawError) {
console.error(`[PROGRESSIVE EXTRACTION] Both ${outputFormat} and copy extraction failed for chunk ${i + 1}:`, rawError);
throw rawError;
}
}
else {
// Already tried copy, so just throw the error
throw execError;
}
}
// Check if output file exists and try to read it
try {
console.log(`[PROGRESSIVE EXTRACTION] Checking if output file exists: ${chunkFilename}`);
// First, list files to see what's actually in the FFmpeg FS
try {
const files = await ffmpegRef.current.listDir('/');
console.log(`[PROGRESSIVE EXTRACTION] Files in FFmpeg FS after chunk ${i + 1}:`, files.map(f => typeof f === 'string' ? f : f.name));
const outputFileExists = files.some(f => {
const fileName = typeof f === 'string' ? f : f.name;
return fileName === chunkFilename;
});
if (!outputFileExists) {
console.log(`[PROGRESSIVE EXTRACTION] Output file ${chunkFilename} was not created by FFmpeg for chunk ${i + 1} - likely no subtitle data in this chunk`);
// This is normal - not all chunks will have subtitle data
continue; // Skip to next chunk
}
console.log(`[PROGRESSIVE EXTRACTION] Output file ${chunkFilename} exists, attempting to read...`);
}
catch (listError) {
console.warn(`[PROGRESSIVE EXTRACTION] Could not list FFmpeg FS files:`, listError);
// Continue anyway and try to read the file
}
const subtitleData = await ffmpegRef.current.readFile(chunkFilename);
if (subtitleData.length > 0) {
console.log(`[PROGRESSIVE EXTRACTION] Found subtitles in chunk ${i + 1}! Size: ${subtitleData.length} bytes`);
collectedSubtitleData.push(subtitleData);
successfulChunks++;
}
else {
console.log(`[PROGRESSIVE EXTRACTION] Chunk ${i + 1} processed but output file is empty`);
}
// Clean up chunk file
try {
await ffmpegRef.current.deleteFile(chunkFilename);
console.log(`[PROGRESSIVE EXTRACTION] Cleaned up output file ${chunkFilename}`);
}
catch (cleanupError) {
console.warn(`[PROGRESSIVE EXTRACTION] Failed to clean up chunk file ${chunkFilename}:`, cleanupError);
}
}
catch (readError) {
console.error(`[PROGRESSIVE EXTRACTION] Failed to read result from chunk ${i + 1}:`, {
error: readError,
errorName: readError instanceof Error ? readError.name : 'Unknown',
errorMessage: readError instanceof Error ? readError.message : String(readError),
fileName: chunkFilename
});
// If we can't read the file, it might not exist (which is normal for chunks without subtitles)
// Don't throw error, just continue to next chunk
console.log(`[PROGRESSIVE EXTRACTION] Continuing to next chunk after read failure...`);
}
// Add a small delay between chunks to help with memory management
if (i < chunkPositions.length - 1) {
await sleep(200);
}
}
catch (chunkError) {
console.error(`[PROGRESSIVE EXTRACTION] Chunk ${i + 1} failed with detailed error:`, {
error: chunkError,
errorName: chunkError instanceof Error ? chunkError.name : 'Unknown',
errorMessage: chunkError instanceof Error ? chunkError.message : String(chunkError),
errorStack: chunkError instanceof Error ? chunkError.stack : undefined,
chunkPosition: formatFileSize(position),
chunkSize: formatFileSize(chunk.size)
});
failedChunks++;
// Try to clean up any partial files from the failed chunk
try {
await cleanupFFmpegFiles();
}
catch (cleanupErr) {
console.warn(`[PROGRESSIVE EXTRACTION] Cleanup after chunk ${i + 1} failure also failed:`, cleanupErr);
}
// Continue to next chunk - don't give up!
}
}
console.log(`[PROGRESSIVE EXTRACTION] Completed processing all chunks. Successful: ${successfulChunks}, Failed: ${failedChunks}`);
// Combine all collected subtitle data
if (collectedSubtitleData.length > 0) {
console.log(`[PROGRESSIVE EXTRACTION] Combining ${collectedSubtitleData.length} subtitle data chunks`);
if (collectedSubtitleData.length === 1) {
// Only one chunk has data, return it directly
return collectedSubtitleData[0];
}
else {
// Multiple chunks have data, combine them
const totalLength = collectedSubtitleData.reduce((sum, data) => sum + data.length, 0);
const combinedData = new Uint8Array(totalLength);
let offset = 0;
for (const data of collectedSubtitleData) {
combinedData.set(data, offset);
offset += data.length;
}
console.log(`[PROGRESSIVE EXTRACTION] Combined ${collectedSubtitleData.length} chunks into ${formatFileSize(combinedData.length)} of subtitle data`);
return combinedData;
}
}
// Even if no subtitle data was found, let's create a placeholder file
console.log(`[PROGRESSIVE EXTRACTION] No subtitle data found in any chunk, creating placeholder`);
const placeholderText = `No subtitle data found in the file.\n\nThis may be because:\n- The subtitle stream is embedded differently than expected\n- The file uses a subtitle format not supported by this extraction method\n- The subtitle data is distributed in a way that wasn't captured\n\nFile: ${file.name}\nStream: ${streamIndex}\nChunks processed: ${chunkPositions.length}\nSuccessful chunks: ${successfulChunks}\nFailed chunks: ${failedChunks}`;
return new TextEncoder().encode(placeholderText);
}, [showProgress]);
// Extract single subtitle
const extractSubtitle = useCallback(async (file, streamIndex, language, codecName, isForced) => {
if (!ffmpegRef.current.loaded)
return;
try {
console.log(`[SUBTITLE EXTRACTION] Starting extraction for file: ${file.name} (${formatFileSize(file.size)}), stream: ${streamIndex}`);
showProgress(`Extracting subtitle track ${streamIndex}...`, 10);
// Aggressive cleanup before subtitle extraction to prevent filesystem contamination
console.log(`[SUBTITLE EXTRACTION] Performing aggressive filesystem cleanup...`);
await cleanupFFmpegFiles();
// Additional cleanup: manually remove any lingering files
try {
const allFiles = await ffmpegRef.current.listDir('/');
console.log(`[SUBTITLE EXTRACTION] Found ${allFiles.length} files in FFmpeg filesystem before cleanup`);
for (const fileInfo of allFiles) {
if (fileInfo.name && !FFMPEG_CONSTANTS.SYSTEM_DIRS.has(fileInfo.name) && !fileInfo.isDir) {
try {
await ffmpegRef.current.deleteFile(fileInfo.name);
console.log(`[SUBTITLE EXTRACTION] Manually deleted lingering file: ${fileInfo.name}`);
}
catch (deleteError) {
console.warn(`[SUBTITLE EXTRACTION] Could not delete ${fileInfo.name}:`, deleteError);
}
}
}
const remainingFiles = await ffmpegRef.current.listDir('/');
console.log(`[SUBTITLE EXTRACTION] Filesystem cleaned, ${remainingFiles.length} files remaining`);
}
catch (listError) {
console.warn(`[SUBTITLE EXTRACTION] Could not perform manual cleanup:`, listError);
}
// Generate filename for text-based subtitles
const generated = generateSubtitleFilename(file.name, language, isForced, codecName);
let outputFilename = generated.filename;
const outputExt = generated.extension;
let outputFormat = outputExt === 'vtt' ? 'webvtt' : outputExt;
// For very large files (>5GB), use progressive extraction like the working useVideoMetadata hook
const LARGE_FILE_THRESHOLD = 5 * 1024 * 1024 * 1024; // 5GB
const isVeryLargeFile = file.size > LARGE_FILE_THRESHOLD;
if (isVeryLargeFile) {
console.log(`[SUBTITLE EXTRACTION] Very large file detected (${formatFileSize(file.size)}), analyzing subtitle format...`);
try {
// For text-based subtitles in large files, use progressive chunk extraction
showProgress(`Scanning large file for text subtitles...`, 20);
const progressiveResult = await extractSubtitleFromMultipleChunks(file, streamIndex, outputFormat, outputFilename);
// Always process the result, even if it's just a placeholder
if (progressiveResult) {
console.log(`[SUBTITLE EXTRACTION] Progressive extraction completed, size: ${progressiveResult.length} bytes`);
// Create download with the result (could be actual subtitles or placeholder info)
const blob = new Blob([progressiveResult], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = outputFilename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
await cleanupFFmpegFiles();
showProgress(`Extraction completed! Downloaded ${formatFileSize(progressiveResult.length)}`, 100);
// Auto-hide progress after 3 seconds
setTimeout(hideProgress, 3000);
return;
}
else {
// This should never happen now since we always return something
console.log(`[SUBTITLE EXTRACTION] Unexpected: no result from progressive extraction`);
const fallbackText = 'Progressive extraction completed but returned no data.';
const fallbackData = new TextEncoder().encode(fallbackText);
const blob = new Blob([fallbackData], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = outputFilename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
await cleanupFFmpegFiles();
showProgress(`Extraction completed! Downloaded ${formatFileSize(fallbackData.length)}`, 100);
setTimeout(hideProgress, 3000);
return;
}
}
catch (progressiveError) {
console.error(`[SUBTITLE EXTRACTION] Progressive extraction had an error:`, progressiveError);
// Even if there was an error, provide a download with error info
const errorText = `Progressive extraction encountered an error.\n\nError: ${progressiveError instanceof Error ? progressiveError.message : 'Unknown error'}\n\nFile: ${file.name}\nStream: ${streamIndex}\n\nThis may be due to:\n- File format compatibility issues\n- Memory limitations\n- Unsupported subtitle encoding\n\nTry using a different extraction method or a video processing tool.`;
const errorData = new TextEncoder().encode(errorText);
const blob = new Blob([errorData], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = outputFilename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
await cleanupFFmpegFiles();
showProgress(`Extraction completed with error info! Downloaded ${formatFileSize(errorData.length)}`, 100);
setTimeout(hideProgress, 3000);
return;
}
}
// For subtitle extraction, we need the complete file regardless of format
// because subtitle data may be distributed throughout the file
console.log(`[SUBTITLE EXTRACTION] Loading complete file for subtitle extraction (${formatFileSize(file.size)})`);
const fileProcessor = createFileProcessor({
chunkSize: PROCESSING_CONSTANTS.CHUNK_SIZES.COMPLETE_FILE,
debug: false
});
showProgress('Loading complete file for subtitle extraction...', 30);
console.log(`[SUBTITLE EXTRACTION] About to process file: ${file.name}, accessible: ${file instanceof File}`);
let fileData;
try {
fileData = await fileProcessor.processCompleteFile(file);
console.log(`[SUBTITLE EXTRACTION] File processed successfully, data size: ${formatFileSize(fileData.size)}`);
}
catch (fileProcessError) {
console.error(`[SUBTITLE EXTRACTION] Failed to process file: ${file.name}`, fileProcessError);
throw new Error(`File processing failed: ${fileProcessError instanceof Error ? fileProcessError.message : 'Unknown error'}`);
}
showProgress('Loading into FFmpeg...', 50);
await ffmpegRef.current.writeFile(FFMPEG_CONSTANTS.TEMP_FILES.INPUT, await fetchFile(fileData));
// Debug: Check what files exist in FFmpeg filesystem
try {
const files = await ffmpegRef.current.listDir('/');
console.log(`[SUBTITLE EXTRACTION] FFmpeg filesystem contents:`, files);
}
catch (listError) {
console.warn(`[SUBTITLE EXTRACTION] Could not list FFmpeg filesystem:`, listError);
}
// Validate that the requested stream exists in the loaded file
try {
console.log(`[SUBTITLE EXTRACTION] Validating stream ${streamIndex} exists in loaded file...`);
// Try to get stream info first - this will fail gracefully if stream doesn't exist
await ffmpegRef.current.exec([
'-i', FFMPEG_CONSTANTS.TEMP_FILES.INPUT,
'-map', `0:${streamIndex}`,
'-t', '0.1', // Only process 0.1 seconds
'-f', 'null',
'-'
]);
console.log(`[SUBTITLE EXTRACTION] Stream ${streamIndex} validation successful`);
}
catch (validationError) {
console.error(`[SUBTITLE EXTRACTION] Stream ${streamIndex} validation failed:`, validationError);
// Stream doesn't exist - try to find the correct subtitle stream index
console.log(`[SUBTITLE EXTRACTION] Stream ${streamIndex} not found, analyzing complete file for correct indices...`);
try {
// Re-analyze the complete file to get accurate metadata
showProgress('Re-analyzing complete file for accurate stream mapping...', 60);
// Instead of re-parsing, let's try to map to existing subtitle streams by finding the first available one
// This is simpler and more reliable than trying to re-parse metadata
console.log(`[SUBTITLE EXTRACTION] Attempting to find alternative subtitle stream indices...`);
let foundWorkingStream = false;
const maxStreamIndex = 20; // Reasonable upper limit to avoid infinite loops
for (let testIndex = 0; testIndex < maxStreamIndex; testIndex++) {
try {
console.log(`[SUBTITLE EXTRACTION] Testing stream index ${testIndex}...`);
await ffmpegRef.current.exec([
'-i', FFMPEG_CONSTANTS.TEMP_FILES.INPUT,
'-map', `0:${testIndex}`,
'-t', '0.1',
'-f', 'null',
'-'
]);
// If we get here, the stream exists - check if it's a subtitle stream
// We can't easily determine codec type without full metadata parsing,
// so we'll assume any working stream index might be what we want
console.log(`[SUBTITLE EXTRACTION] Found working stream at index ${testIndex}, using this instead of ${streamIndex}`);
streamIndex = testIndex;
foundWorkingStream = true;
break;
}
catch {
// Stream doesn't exist or isn't accessible, try next
continue;
}
}
if (!foundWorkingStream) {
throw new Error(`No accessible subtitle streams found in indices 0-${maxStreamIndex}. The file may not contain subtitle tracks or they may be in a different format.`);
}
}
catch (reanalysisError) {
console.error(`[SUBTITLE EXTRACTION] Complete file re-analysis failed:`, reanalysisError);
throw new Error(`Stream index ${streamIndex} not found and could not re-analyze file: ${reanalysisError instanceof Error ? reanalysisError.message : 'Unknown error'}`);
}
}
showProgress(`Extracting subtitle (${outputFormat} format)...`, 70);
// Extract subtitle
try {
console.log(`[SUBTITLE EXTRACTION] Attempting extraction with format: ${outputFormat}`);
await ffmpegRef.current.exec([
'-i', FFMPEG_CONSTANTS.TEMP_FILES.INPUT,
'-map', `0:${streamIndex}`,
'-c:s', outputFormat,
outputFilename
]);
console.log(`[SUBTITLE EXTRACTION] Extraction completed with format: ${outputFormat}`);
}
catch (extractError) {
console.error(`[SUBTITLE EXTRACTION] Extraction with ${outputFormat} failed:`, extractError);
console.log(`[SUBTITLE EXTRACTION] Error details:`, {
errorName: extractError instanceof Error ? extractError.name : 'Unknown',
errorMessage: extractError instanceof Error ? extractError.message : String(extractError),
streamIndex,
outputFormat,
outputFilename
});
console.log(`[SUBTITLE EXTRACTION] Trying fallback...`);
if (outputFormat !== 'copy') {
// Try SRT fallback for text formats
try {
const srtFilename = outputFilename.replace(/\.[^/.]+$/, '.srt');
console.log(`[SUBTITLE EXTRACTION] Trying SRT fallback: ${srtFilename}`);
await ffmpegRef.current.exec([
'-i', FFMPEG_CONSTANTS.TEMP_FILES.INPUT,
'-map', `0:${streamIndex}`,
'-c:s', 'srt',
srtFilename
]);
outputFilename = srtFilename; // Update filename for download
console.log(`[SUBTITLE EXTRACTION] SRT fallback successful`);
}
catch (srtError) {
console.error(`[SUBTITLE EXTRACTION] SRT fallback failed:`, srtError);
console.log(`[SUBTITLE EXTRACTION] SRT error details:`, {
errorName: srtError instanceof Error ? srtError.name : 'Unknown',
errorMessage: srtError instanceof Error ? srtError.message : String(srtError)
});
// Try raw copy as last resort
console.log(`[SUBTITLE EXTRACTION] Trying raw copy...`);
try {
const fallbackFilename = outputFilename.replace(/\.[^/.]+$/, '.txt');
await ffmpegRef.current.exec([
'-i', FFMPEG_CONSTANTS.TEMP_FILES.INPUT,
'-map', `0:${streamIndex}`,
'-c:s', 'copy',
fallbackFilename
]);
outputFilename = fallbackFilename; // Update filename for download
console.log(`[SUBTITLE EXTRACTION] Raw copy successful`);
}
catch (copyError) {
console.error(`[SUBTITLE EXTRACTION] Raw copy also failed:`, copyError);
console.log(`[SUBTITLE EXTRACTION] Raw copy error details:`, {
errorName: copyError instanceof Error ? copyError.name : 'Unknown',
errorMessage: copyError instanceof Error ? copyError.message : String(copyError)
});
throw copyError; // Re-throw since all methods failed
}
}
}
else {
// Already tried copy, re-throw the error
throw extractError;
}
}
showProgress('Preparing download...', 90);
// Read subtitle data
const subtitleData = await ffmpegRef.current.readFile(outputFilename);
const dataArray = subtitleData instanceof Uint8Array ? subtitleData : new Uint8Array(subtitleData);
// Create download with appropriate MIME type
const mimeType = 'text/plain';
const blob = new Blob([dataArray], { type: mimeType });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = outputFilename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
await cleanupFFmpegFiles();
showProgress(`Subtitle extraction completed! (${formatFileSize(dataArray.length)})`, 100);
// Auto-hide progress after 3 seconds
setTimeout(hideProgress, 3000);
}
catch (error) {
await cleanupFFmpegFiles();
const errorMessage = error instanceof Error ? error.message : 'Failed to extract subtitle';
showError(`Subtitle extraction failed: ${errorMessage}`);
hideProgress();
}
}, [showProgress, hideProgress, showError, cleanupFFmpegFiles, extractSubtitleFromMultipleChunks]);
// Extract all subtitles as ZIP
const extractAllSubtitles = useCallback(async (file) => {
console.log(`[EXTRACT ALL DEBUG] Starting extractAllSubtitles for: ${file.name}`);
console.log(`[EXTRACT ALL DEBUG] FFmpeg loaded: ${ffmpegRef.current.loaded}, Has metadata: ${!!metadata}`);
if (!ffmpegRef.current.loaded || !metadata) {
console.log(`[EXTRACT ALL DEBUG] Early return - FFmpeg loaded: ${ffmpegRef.current.loaded}, metadata: ${!!metadata}`);
return;
}
try {
showProgress('Preparing batch subtitle extraction...', 5);
const subtitleStreams = metadata.streams?.filter(stream => stream.codec_type === 'subtitle') || [];
console.log(`[EXTRACT ALL DEBUG] Found ${subtitleStreams.length} subtitle streams`);
if (subtitleStreams.length === 0) {
console.log(`[EXTRACT ALL DEBUG] No subtitle streams found, showing error`);
showError(ERROR_MESSAGES.SUBTITLE.NO_TRACKS);
return;
}
console.log(`[EXTRACT ALL DEBUG] Starting COMPLETE FFmpeg reset`);
const resetStartTime = performance.now();
// COMPLETE FFmpeg reset - terminate and reinitialize
try {
console.log(`[EXTRACT ALL DEBUG] Terminating FFmpeg instance...`);
await ffmpegRef.current.terminate();
console.log(`[EXTRACT ALL DEBUG] FFmpeg terminated successfully`);
}
catch (terminateError) {
console.log(`[EXTRACT ALL DEBUG] FFmpeg terminate error (expected):`, terminateError);
}
// Reinitialize FFmpeg completely fresh
console.log(`[EXTRACT ALL DEBUG] Reinitializing FFmpeg from scratch...`);
ffmpegRef.current = new FFmpeg();
const baseURL = 'https://unpkg.com/@ffmpeg/core@0.12.6/dist/esm';
await ffmpegRef.current.load({
coreURL: await cachedToBlobURL(`${baseURL}/ffmpeg-core.js`, 'text/javascript'),
wasmURL: await cachedToBlobURL(`${baseURL}/ffmpeg-core.wasm`, 'application/wasm'),
});
const resetEndTime = performance.now();
console.log(`[EXTRACT ALL DEBUG] FFmpeg reinitialized successfully in ${Math.round(resetEndTime - resetStartTime)}ms`);
// Always load the complete file using chunked reading for batch subtitle extraction
// Only metadata extraction uses header-only approach
console.log(`[BATCH SUBTITLE EXTRACTION] Loading complete file (${formatFileSize(file.size)}) using chunked reading`);
const fileProcessor = createFileProcessor({
chunkSize: PROCESSING_CONSTANTS.CHUNK_SIZES.COMPLETE_FILE,
debug: false
});
const fileData = await fileProcessor.processCompleteFile(file);
showProgress('Loading file for batch extraction...', 15);
await ffmpegRef.current.writeFile(FFMPEG_CONSTANTS.TEMP_FILES.INPUT, await fetchFile(fileData));
console.log(`[EXTRACT ALL DEBUG] File loaded into fresh FFmpeg instance: ${FFMPEG_CONSTANTS.TEMP_FILES.INPUT}`);
const zip = new JSZip();
const extractedFiles = [];
const usedFilenames = new Set();
// Extract each subtitle stream
for (let i = 0; i < subtitleStreams.length; i++) {
const stream = subtitleStreams[i];
const streamIndex = stream.index !== undefined ? stream.index : i;
const progress = Math.round(20 + (i / subtitleStreams.length) * 70);
showProgress(`Extracting subtitle ${i + 1}/${subtitleStreams.length} (${stream.language || 'unknown'})...`, progress);
try {
// Log stream details for debugging
console.log(`[BATCH SUBTITLE EXTRACTION] Processing stream ${i + 1}/${subtitleStreams.length}:`, {
streamIndex,
realIndex: stream.index,
language: stream.language,
codecName: stream.codec_name,
forced: stream.forced
});
// Generate unique filename for text-based subtitles
const generated = generateSubtitleFilename(file.name, stream.language, stream.forced, stream.codec_name);
let outputFilename = generated.filename;
const outputFormat = outputFilename.endsWith('.vtt') ? 'webvtt' : 'srt';
if (usedFilenames.has(outputFilename)) {
const nameWithoutExt = file.name.replace(/\.[^/.]+$/, '');
const ext = outputFilename.split('.').pop();
outputFilename = `${nameWithoutExt}.${streamIndex}.${ext}`;
}
usedFilenames.add(outputFilename);
// Extract subtitle using the complete file (already loaded)
console.log(`[BATCH SUBTITLE EXTRACTION] Extracting stream ${streamIndex} from complete file using format: ${outputFormat}`);
// Extract subtitle using fresh FFmpeg instance (stream indices should be accurate now)
try {
await ffmpegRef.current.exec([
'-i', FFMPEG_CONSTANTS.TEMP_FILES.INPUT,
'-map', `0:${streamIndex}`,
'-c:s', outputFormat,
outputFilename
]);
console.log(`[BATCH SUBTITLE EXTRACTION] Stream ${streamIndex} extracted successfully with format: ${outputFormat}`);
}
catch (extractError) {
console.log(`[BATCH SUBTITLE EXTRACTION] Extraction with ${outputFormat} failed for stream ${streamIndex}, trying fallback...`);
// Try SRT fallback for text formats
try {
const srtFilename = outputFilename.replace(/\.[^/.]+$/, '.srt');
await ffmpegRef.current.exec([
'-i', FFMPEG_CONSTANTS.TEMP_FILES.INPUT,
'-map', `0:${streamIndex}`,
'-c:s', 'srt',
srtFilename
]);
outputFilename = srtFilename;
console.log(`[BATCH SUBTITLE EXTRACTION] SRT fallback successful for stream ${streamIndex}`);
}