UNPKG

@opensubtitles/video-metadata-extractor

Version:

A comprehensive NPM package for video metadata extraction and subtitle processing using FFmpeg WASM. Supports metadata extraction, individual subtitle extraction, batch subtitle extraction with ZIP downloads, and memory-safe processing of files of any siz

260 lines 10.3 kB
/** * File processing utilities with memory-safe chunked operations * Centralizes file handling logic to eliminate duplication */ import { PROCESSING_CONSTANTS, UI_CONSTANTS } from '../constants/index.js'; import { createProcessingStats, updateProcessingStats, sleep } from './common.js'; /** * File processor class for handling large files with chunked processing */ export class FileProcessor { constructor(debug = false) { this.debug = debug; this.stats = createProcessingStats(0); } /** * Process file in chunks with memory-safe streaming */ async processInChunks(file, chunkProcessor, options = {}) { const chunkSize = options.chunkSize || PROCESSING_CONSTANTS.CHUNK_SIZES.COMPLETE_FILE; const fileSize = file.size; const totalChunks = Math.ceil(fileSize / chunkSize); this.stats = createProcessingStats(fileSize); if (this.debug) { console.log(`[FileProcessor] Processing ${Math.round(fileSize / 1024 / 1024)}MB file in ${totalChunks} chunks`); } const results = []; for (let offset = 0; offset < fileSize; offset += chunkSize) { const end = Math.min(offset + chunkSize, fileSize); const chunk = file.slice(offset, end); const chunkIndex = Math.floor(offset / chunkSize); const progress = Math.round(((chunkIndex + 1) / totalChunks) * 100); const chunkSizeMB = Math.round((end - offset) / 1024 / 1024); if (options.onProgress) { options.onProgress(progress, `Processing chunk ${chunkIndex + 1}/${totalChunks} (${chunkSizeMB}MB)...`); } try { const result = await chunkProcessor(chunk, chunkIndex, totalChunks); results.push(result); this.stats = updateProcessingStats(this.stats, chunkIndex + 1); if (this.debug) { console.log(`[FileProcessor] Completed chunk ${chunkIndex + 1}/${totalChunks}`); } // Add small delay to prevent UI blocking for large files if (chunkIndex % UI_CONSTANTS.BATCH_PROCESSING.PROGRESS_UPDATE_FREQUENCY === 0) { await sleep(UI_CONSTANTS.DELAYS.CHUNK_UI_DELAY); } } catch (error) { throw new Error(`Failed to process chunk ${chunkIndex + 1}: ${error instanceof Error ? error.message : 'Unknown error'}`); } } return results; } /** * Create complete file data using chunked streaming * Memory-safe approach for files of any size */ async createCompleteFileData(file, onProgress) { const fileSize = file.size; // For small files, return as-is if (fileSize < PROCESSING_CONSTANTS.CHUNK_SIZES.SMALL_FILE_THRESHOLD) { if (this.debug) { console.log(`[FileProcessor] Small file (${Math.round(fileSize / 1024 / 1024)}MB), returning directly`); } return file; } if (this.debug) { console.log(`[FileProcessor] Creating complete file data using chunked strategy`); } const chunks = await this.processInChunks(file, async (chunk) => chunk, { onProgress }); // Combine all chunks into a single blob const completeFile = new Blob(chunks, { type: file.type }); if (this.debug) { console.log(`[FileProcessor] Combined ${chunks.length} chunks into complete file blob`); } return completeFile; } /** * Progressive extraction for very large files */ async extractFromMultipleChunks(file, extractor, options = {}) { const fileSize = file.size; const chunkSize = options.chunkSize || PROCESSING_CONSTANTS.CHUNK_SIZES.QUICK_EXTRACTION; const maxChunks = options.maxChunks || PROCESSING_CONSTANTS.LIMITS.MAX_PROGRESSIVE_CHUNKS; // Calculate strategic chunk positions const totalChunks = Math.min(maxChunks, Math.ceil(fileSize / (500 * 1024 * 1024))); // One chunk per 500MB const chunkPositions = []; for (let i = 0; i < totalChunks; i++) { const position = Math.floor((fileSize / totalChunks) * i); chunkPositions.push(position); } if (this.debug) { console.log(`[FileProcessor] Progressive extraction with ${chunkPositions.length} strategic positions`); } // Try each chunk until we find the desired content for (let i = 0; i < chunkPositions.length; i++) { const position = chunkPositions[i]; const chunkEnd = Math.min(position + chunkSize, fileSize); const chunk = file.slice(position, chunkEnd); if (options.onProgress) { const progress = 20 + (i / chunkPositions.length) * 60; options.onProgress(progress, `Scanning chunk ${i + 1}/${chunkPositions.length} at ${(position / 1024 / 1024 / 1024).toFixed(1)}GB...`); } try { const result = await extractor(chunk, position); if (result !== null) { if (this.debug) { console.log(`[FileProcessor] Found content in chunk ${i + 1} at position ${position}`); } return result; } } catch (error) { if (this.debug) { console.log(`[FileProcessor] Chunk ${i + 1} failed:`, error); } // Continue to next chunk } } if (this.debug) { console.log(`[FileProcessor] No content found in any of the ${chunkPositions.length} chunks`); } return null; } /** * Handle large file downloads with chunked streaming */ downloadLargeFile(data, filename, onProgress) { const maxBlobSize = PROCESSING_CONSTANTS.LIMITS.MAX_BLOB_SIZE; if (this.debug) { console.log(`[FileProcessor] Starting download for ${filename}, size: ${data.length} bytes`); } // For normal-sized files, use standard blob download if (data.length < maxBlobSize) { this._standardBlobDownload(data, filename); return; } // For large files, use chunked streaming download this._chunkedStreamDownload(data, filename, onProgress); } /** * Standard blob download for normal-sized files */ _standardBlobDownload(data, filename) { const blob = new Blob([data], { type: 'application/octet-stream' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); if (this.debug) { console.log(`[FileProcessor] Standard blob download completed for ${filename}`); } } /** * Chunked stream download for large files */ _chunkedStreamDownload(data, filename, onProgress) { const chunkSize = PROCESSING_CONSTANTS.CHUNK_SIZES.DOWNLOAD; if (this.debug) { console.log(`[FileProcessor] Using chunked stream download for ${filename}`); } const stream = new ReadableStream({ start(controller) { let offset = 0; const pump = () => { if (offset < data.length) { const chunk = data.slice(offset, Math.min(offset + chunkSize, data.length)); controller.enqueue(chunk); offset += chunk.length; // Report progress if (onProgress) { onProgress((offset / data.length) * 100); } // Continue with next chunk setTimeout(pump, 0); } else { controller.close(); } }; pump(); } }); // Create response from stream and trigger download const response = new Response(stream); response.blob() .then(blob => { const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); if (this.debug) { console.log(`[FileProcessor] Chunked stream download completed for ${filename}`); } }) .catch(error => { console.error(`[FileProcessor] Chunked download failed:`, error); throw error; }); } /** * Get processing statistics */ getStats() { return { ...this.stats }; } /** * Reset processor state */ reset() { this.stats = createProcessingStats(0); } } /** * Utility functions for file operations */ /** * Check if file is considered large */ export function isLargeFile(file) { return file.size > PROCESSING_CONSTANTS.LIMITS.VERY_LARGE_FILE_THRESHOLD; } /** * Check if file is considered very large */ export function isVeryLargeFile(file) { return file.size > PROCESSING_CONSTANTS.LIMITS.LARGE_FILE_THRESHOLD; } /** * Get recommended processing strategy based on file size */ export function getRecommendedProcessingStrategy(file) { if (file.size < PROCESSING_CONSTANTS.CHUNK_SIZES.SMALL_FILE_THRESHOLD) { return 'quick'; } else if (file.size < PROCESSING_CONSTANTS.LIMITS.LARGE_FILE_THRESHOLD) { return 'chunked'; } else { return 'progressive'; } } /** * Create a file processor instance with options */ export function createFileProcessor(options = {}) { const processor = new FileProcessor(options.debug || false); return { processCompleteFile: (file) => processor.createCompleteFileData(file) }; } //# sourceMappingURL=fileProcessor.js.map