UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

1,135 lines (1,134 loc) 47.2 kB
/** * Video Processor * * Handles downloading, validating, and processing video files for AI consumption. * Since LLMs cannot process raw video, this processor extracts: * - Structured metadata (duration, resolution, codecs, etc.) * - Keyframes at configurable intervals (resized to 768px JPEG) * - Embedded subtitle tracks (if present) * * The extracted content is formatted as text + images that can be sent to any * AI provider for analysis. * * Uses mediabunny (pure TypeScript) for metadata extraction, with fluent-ffmpeg * as a fallback for unsupported formats. Requires ffmpeg for keyframe/subtitle * extraction (via ffmpeg-static or system PATH). * * Key features: * - Adaptive keyframe extraction intervals based on video duration * - Frame count capping (max 20 frames) to control token usage * - JPEG quality optimization for AI vision models * - Embedded subtitle extraction (SRT format) * - Graceful degradation on corrupt files or missing codecs * - Temp file cleanup with finally blocks * - Configurable timeouts for ffmpeg and ffprobe operations * * @module processors/media/VideoProcessor * * @example * ```typescript * import { videoProcessor, processVideo, isVideoFile } from "./VideoProcessor.js"; * * // Check if a file is a video file * if (isVideoFile(fileInfo.mimetype, fileInfo.name)) { * const result = await processVideo(fileInfo, { * authHeaders: { Authorization: "Bearer token" }, * }); * * if (result.success) { * console.log(`Duration: ${result.data.metadata.durationFormatted}`); * console.log(`Keyframes: ${result.data.frameCount}`); * console.log(`Text for LLM:\n${result.data.textContent}`); * } * } * ``` */ import { randomUUID } from "crypto"; import { createWriteStream, existsSync, promises as fs } from "fs"; import { tmpdir } from "os"; import { join } from "path"; import { Readable } from "stream"; import { pipeline } from "stream/promises"; import { BaseFileProcessor } from "../base/BaseFileProcessor.js"; import { SIZE_LIMITS_MB } from "../config/index.js"; import { FileErrorCode } from "../errors/index.js"; import { tracers, ATTR, withSpan } from "../../telemetry/index.js"; import { logger } from "../../utils/logger.js"; // fluent-ffmpeg's default export is callable + has static methods — avoid caching // the module type (it confuses TS); Node's module cache handles dedup. async function loadFluentFfmpeg() { try { const mod = await import(/* @vite-ignore */ "fluent-ffmpeg"); return mod.default; } catch (err) { const e = err instanceof Error ? err : null; if (e?.code === "ERR_MODULE_NOT_FOUND" && e.message.includes("fluent-ffmpeg")) { throw new Error('Video processing requires the "fluent-ffmpeg" package. Install it with:\n pnpm add fluent-ffmpeg', { cause: err }); } throw err; } } let _mediabunny = null; async function loadMediaBunny() { if (_mediabunny) { return _mediabunny; } try { _mediabunny = await import(/* @vite-ignore */ "mediabunny"); return _mediabunny; } catch (err) { const e = err instanceof Error ? err : null; if (e?.code === "ERR_MODULE_NOT_FOUND" && e.message.includes("mediabunny")) { throw new Error('Video processing requires the "mediabunny" package. Install it with:\n pnpm add mediabunny', { cause: err }); } throw err; } } // ============================================================================= // FFMPEG PATH INITIALIZATION // ============================================================================= /** * Whether ffmpeg/ffprobe paths have been initialized. * We only attempt path resolution once to avoid repeated dynamic import overhead. */ let ffmpegPathInitialized = false; /** * Initialize ffmpeg binary paths. * Tries ffmpeg-static first, falls back to system binary in PATH. * * Note: ffprobe-static has been removed. Metadata probing now uses mediabunny * (pure TypeScript) as the primary method, with ffprobe as a fallback only when * mediabunny cannot handle the format (e.g., AVI, FLV). * * This is called lazily on the first processFile() invocation so that the module * can be imported without side effects. */ async function initFfmpegPaths() { if (ffmpegPathInitialized) { return; } ffmpegPathInitialized = true; // Try ffmpeg-static first, fall back to system ffmpeg. // IMPORTANT: Verify the binary actually exists before setting the path. // On some platforms (e.g., macOS ARM), ffmpeg-static installs the npm package // but the pre-built binary download fails silently, leaving a non-existent path. // If we set a bad path, ffmpeg commands fail with ENOENT instead of using // the perfectly good system ffmpeg in PATH. try { const ffmpegStatic = await import("ffmpeg-static"); const ffmpegPath = ffmpegStatic.default; if (typeof ffmpegPath === "string" && existsSync(ffmpegPath)) { const ff = await loadFluentFfmpeg(); ff.setFfmpegPath(ffmpegPath); } } catch { // Use system ffmpeg (already in PATH) } } // ============================================================================= // TYPES // ============================================================================= // ============================================================================= // CONSTANTS // ============================================================================= /** * Video processing configuration constants. * Controls frame extraction behavior, quality, and timeout limits. */ const VIDEO_CONFIG = { /** Maximum number of keyframes to extract from a video */ MAX_FRAMES: 100, /** * Frame extraction intervals based on video duration. * Shorter videos get more frequent frames; longer videos use wider intervals. */ FRAME_INTERVALS: [ { maxDuration: 10, intervalSec: 1 }, // 10s → up to 10 frames { maxDuration: 30, intervalSec: 2 }, // 30s → up to 15 frames { maxDuration: 120, intervalSec: 3 }, // 2min → up to 40 frames { maxDuration: 600, intervalSec: 6 }, // 10min → up to 100 frames { maxDuration: 1800, intervalSec: 20 }, // 30min → up to 90 frames { maxDuration: Infinity, intervalSec: 60 }, // >30min → adaptive kicks in ], /** Maximum dimension (width or height) for extracted keyframes in pixels */ FRAME_MAX_DIMENSION: 768, /** JPEG quality for extracted keyframes (0-100) */ FRAME_JPEG_QUALITY: 80, /** Timeout for ffmpeg frame extraction / subtitle extraction in milliseconds */ FFMPEG_TIMEOUT_MS: 120_000, /** Timeout for ffprobe metadata extraction in milliseconds */ FFPROBE_TIMEOUT_MS: 10_000, }; /** Supported video MIME types */ const SUPPORTED_VIDEO_MIME_TYPES = [ "video/mp4", "video/x-matroska", "video/quicktime", "video/webm", "video/x-msvideo", "video/x-ms-wmv", "video/x-flv", "video/3gpp", "video/3gpp2", "video/MP2T", "video/ogg", ]; /** Supported video file extensions */ const SUPPORTED_VIDEO_EXTENSIONS = [ ".mp4", ".m4v", ".mkv", ".mov", ".avi", ".wmv", ".flv", ".webm", ".3gp", ".3g2", ".ts", ".mts", ".m2ts", ".ogv", ".vob", ]; /** * Maximum video file size in MB. * Uses VIDEO_MAX_MB (500 MB) to support long meeting recordings and screen captures. */ const VIDEO_MAX_SIZE_MB = SIZE_LIMITS_MB.VIDEO_MAX_MB; /** Default timeout for video download (2 minutes for larger files) */ const VIDEO_DOWNLOAD_TIMEOUT_MS = 120_000; // ============================================================================= // VIDEO PROCESSOR CLASS // ============================================================================= /** * Video Processor - extracts metadata, keyframes, and subtitles from video files. * * Since LLMs cannot process raw video, this processor converts videos into * a structured representation consisting of: * 1. Text metadata block (duration, resolution, codecs, etc.) * 2. Keyframe images (JPEG, resized to 768px max dimension) * 3. Subtitle text (if embedded in the video) * * The processor uses a temp file approach because ffmpeg requires file paths * for most operations. Temp files are always cleaned up in finally blocks. * * @example * ```typescript * const processor = new VideoProcessor(); * const result = await processor.processFile({ * id: "video-1", * name: "presentation.mp4", * mimetype: "video/mp4", * size: 15_000_000, * buffer: videoBuffer, * }); * * if (result.success) { * // result.data.textContent - text description for LLM * // result.data.keyframes - array of JPEG buffers * // result.data.subtitleText - extracted subtitles (if any) * } * ``` */ export class VideoProcessor extends BaseFileProcessor { constructor() { super({ maxSizeMB: VIDEO_MAX_SIZE_MB, timeoutMs: VIDEO_DOWNLOAD_TIMEOUT_MS, supportedMimeTypes: [...SUPPORTED_VIDEO_MIME_TYPES], supportedExtensions: [...SUPPORTED_VIDEO_EXTENSIONS], fileTypeName: "video", defaultFilename: "video.mp4", }); } // =========================================================================== // ABSTRACT METHOD IMPLEMENTATION // =========================================================================== /** * Build processed result stub. * This is a synchronous placeholder - actual processing happens in the * overridden processFile method since ffmpeg operations are asynchronous * and require temp file I/O. * * @param buffer - Downloaded file content * @param fileInfo - Original file information * @returns Empty ProcessedVideo structure */ buildProcessedResult(buffer, fileInfo) { return { buffer, mimetype: fileInfo.mimetype || "video/mp4", size: fileInfo.size, filename: this.getFilename(fileInfo), textContent: "", keyframes: [], metadata: { duration: 0, durationFormatted: "0s", width: 0, height: 0, codec: "unknown", fps: 0, bitrate: 0, subtitleTracks: 0, fileSize: fileInfo.size, }, hasKeyframes: false, frameCount: 0, }; } // =========================================================================== // MAIN PROCESSING OVERRIDE // =========================================================================== /** * Override processFile for async video processing with ffmpeg. * * Processing pipeline: * 1. Validate file type and size * 2. Get buffer (from fileInfo.buffer or download from URL) * 3. Write buffer to temp file (ffmpeg requires file paths) * 4. Extract metadata using ffprobe * 5. Extract keyframes at calculated intervals, resize with sharp * 6. Extract subtitle tracks if embedded * 7. Build textContent summary for LLM * 8. Clean up temp files * * @param fileInfo - File information with URL or buffer * @param options - Optional processing options * @returns Processing result with extracted video data or error */ async processFile(fileInfo, options) { const filename = this.getFilename(fileInfo); const sizeBytes = fileInfo.size || fileInfo.buffer?.length || 0; return withSpan({ name: "neurolink.file.video.process", tracer: tracers.file, attributes: { [ATTR.FILE_NAME]: filename, [ATTR.FILE_MIMETYPE]: fileInfo.mimetype || "video/mp4", [ATTR.FILE_SIZE_BYTES]: sizeBytes, }, }, async (span) => { logger.info(`[NEUROLINK] Video processing started: ${filename} (${(sizeBytes / (1024 * 1024)).toFixed(2)} MB, ${fileInfo.mimetype || "video/mp4"})`); // Ensure ffmpeg paths are initialized before any processing await initFfmpegPaths(); // Temp directory for this processing run const tempDir = join(tmpdir(), `neurolink-video-${randomUUID()}`); let tempCreated = false; try { // Step 1: Validate file type and size const validationResult = this.validateFileWithResult(fileInfo); if (!validationResult.success) { const validationErrMsg = validationResult.error?.message || "Validation failed"; span.setAttribute(ATTR.FILE_SUCCESS, false); span.setAttribute(ATTR.FILE_ERROR, validationErrMsg); logger.warn(`[NEUROLINK] Video skipped/failed: ${filename} — reason: ${validationErrMsg}`); return { success: false, error: validationResult.error }; } // Step 2: Get file buffer let buffer; if (fileInfo.buffer) { buffer = fileInfo.buffer; } else if (fileInfo.url) { const downloadResult = await this.downloadFileWithRetry(fileInfo, options); if (!downloadResult.success) { const downloadErrMsg = downloadResult.error?.message || "Download failed"; span.setAttribute(ATTR.FILE_SUCCESS, false); span.setAttribute(ATTR.FILE_ERROR, downloadErrMsg); logger.warn(`[NEUROLINK] Video skipped/failed: ${filename} — reason: ${downloadErrMsg}`); return { success: false, error: downloadResult.error }; } if (!downloadResult.data) { const errMsg = "Download succeeded but returned no data"; span.setAttribute(ATTR.FILE_SUCCESS, false); span.setAttribute(ATTR.FILE_ERROR, errMsg); logger.warn(`[NEUROLINK] Video skipped/failed: ${filename} — reason: ${errMsg}`); return { success: false, error: this.createError(FileErrorCode.DOWNLOAD_FAILED, { reason: errMsg, }), }; } buffer = downloadResult.data; // Validate actual downloaded size if (!this.validateFileSize(buffer.length)) { const errMsg = `File too large: ${(buffer.length / (1024 * 1024)).toFixed(2)} MB (max: ${this.config.maxSizeMB} MB)`; span.setAttribute(ATTR.FILE_SUCCESS, false); span.setAttribute(ATTR.FILE_ERROR, errMsg); logger.warn(`[NEUROLINK] Video skipped/failed: ${filename} — reason: ${errMsg}`); return { success: false, error: this.createError(FileErrorCode.FILE_TOO_LARGE, { sizeMB: (buffer.length / (1024 * 1024)).toFixed(2), maxMB: this.config.maxSizeMB, type: this.config.fileTypeName, }), }; } } else { const errMsg = "No buffer or URL provided for file"; span.setAttribute(ATTR.FILE_SUCCESS, false); span.setAttribute(ATTR.FILE_ERROR, errMsg); logger.warn(`[NEUROLINK] Video skipped/failed: ${filename} — reason: ${errMsg}`); return { success: false, error: this.createError(FileErrorCode.DOWNLOAD_FAILED, { reason: errMsg, }), }; } // Step 3: Write buffer to temp file (ffmpeg needs a file path) await fs.mkdir(tempDir, { recursive: true }); tempCreated = true; const extension = this.getExtensionFromFileInfo(fileInfo); const tempVideoPath = join(tempDir, `input${extension}`); await this.writeBufferToFile(buffer, tempVideoPath); // Step 4: Extract metadata — try mediabunny first (pure TS, no binary), // fall back to ffprobe for formats mediabunny doesn't support (AVI, FLV, WMV). let metadata; const mediabunnyResult = await this.probeVideoWithMediabunny(tempVideoPath); if (mediabunnyResult.success && mediabunnyResult.data) { metadata = { ...mediabunnyResult.data, fileSize: buffer.length }; } else { // Fall back to ffprobe (requires system ffprobe to be available) const probeResult = await this.probeVideo(tempVideoPath); if (probeResult.success && probeResult.data) { metadata = this.buildMetadata(probeResult.data, buffer.length); } } if (!metadata) { metadata = { duration: 0, durationFormatted: "unknown", width: 0, height: 0, codec: "unknown", fps: 0, bitrate: 0, subtitleTracks: 0, fileSize: buffer.length, }; } // Record video-specific metadata on span span.setAttribute(ATTR.VIDEO_DURATION_SEC, metadata.duration); span.setAttribute(ATTR.VIDEO_WIDTH, metadata.width); span.setAttribute(ATTR.VIDEO_HEIGHT, metadata.height); span.setAttribute(ATTR.VIDEO_CODEC, metadata.codec); span.setAttribute(ATTR.VIDEO_HAS_SUBTITLES, metadata.subtitleTracks > 0); // Step 5: Extract keyframes let keyframes = []; try { keyframes = await this.extractKeyframes(tempVideoPath, tempDir, metadata.duration); } catch { // Non-fatal: continue without keyframes if extraction fails // (e.g., audio-only file in a video container) logger.warn(`[NEUROLINK] Video keyframe extraction failed for ${filename}, continuing without keyframes`); } span.setAttribute(ATTR.VIDEO_KEYFRAMES_EXTRACTED, keyframes.length); // Step 6: Extract subtitles let subtitleText; if (metadata.subtitleTracks > 0) { try { subtitleText = await this.extractSubtitles(tempVideoPath, tempDir); } catch { // Non-fatal: continue without subtitles if extraction fails } } // Step 7: Build textContent for LLM const textContent = this.buildTextContent(metadata, keyframes.length, subtitleText, this.getFilename(fileInfo)); span.setAttribute(ATTR.VIDEO_TEXT_CONTENT_LENGTH, textContent.length); span.setAttribute(ATTR.FILE_OUTPUT_LENGTH, textContent.length); span.setAttribute(ATTR.FILE_SUCCESS, true); logger.info(`[NEUROLINK] Video processed: ${filename}${textContent.length} bytes text + ${keyframes.length} keyframes ` + `(${metadata.durationFormatted}, ${metadata.width}x${metadata.height}, ${metadata.codec})`); // Step 8: Return structured result return { success: true, data: { buffer, mimetype: fileInfo.mimetype || "video/mp4", size: fileInfo.size, filename: this.getFilename(fileInfo), textContent, keyframes, metadata, subtitleText, hasKeyframes: keyframes.length > 0, frameCount: keyframes.length, }, }; } catch (error) { const errMsg = error instanceof Error ? error.message : String(error); span.setAttribute(ATTR.FILE_SUCCESS, false); span.setAttribute(ATTR.FILE_ERROR, errMsg); logger.error(`[NEUROLINK] Video processing failed: ${filename}${errMsg}`); return { success: false, error: this.createError(FileErrorCode.PROCESSING_FAILED, { fileType: "video", error: errMsg, }, error instanceof Error ? error : undefined), }; } finally { // Step 8: Clean up temp files if (tempCreated) { await fs.rm(tempDir, { recursive: true, force: true }).catch(() => { // Ignore cleanup errors - temp files will be cleaned by OS eventually }); } } }); } // =========================================================================== // METADATA EXTRACTION // =========================================================================== /** * Probe a video file to extract metadata using ffprobe. * * @param filePath - Path to the video file * @returns Success result with probe data or error message */ async probeVideo(filePath) { const ffmpeg = await loadFluentFfmpeg(); return new Promise((resolve) => { const timeoutId = setTimeout(() => { resolve({ success: false, error: `ffprobe timed out after ${VIDEO_CONFIG.FFPROBE_TIMEOUT_MS}ms`, }); }, VIDEO_CONFIG.FFPROBE_TIMEOUT_MS); ffmpeg.ffprobe(filePath, (err, data) => { clearTimeout(timeoutId); if (err) { resolve({ success: false, error: `ffprobe failed: ${err.message}`, }); } else { resolve({ success: true, data }); } }); }); } /** * Probe a video file using mediabunny (pure TypeScript, no native binary). * Falls back to ffprobe if mediabunny fails or doesn't support the format. */ async probeVideoWithMediabunny(filePath) { const mb = await loadMediaBunny(); let input; try { input = new mb.Input({ source: new mb.FilePathSource(filePath), formats: [...mb.ALL_FORMATS], }); const duration = await input.computeDuration(); const videoTrack = await input.getPrimaryVideoTrack(); const audioTrack = await input.getPrimaryAudioTrack(); const allTracks = await input.getTracks(); const subtitleTracks = allTracks.filter((t) => !t.isVideoTrack() && !t.isAudioTrack()); // Get FPS from video track packet stats (sample a small number of packets) let fps = 0; if (videoTrack) { try { const stats = await videoTrack.computePacketStats(120); fps = Math.round(stats.averagePacketRate * 100) / 100; } catch { // FPS unavailable — non-fatal } } return { success: true, data: { duration: duration ?? 0, durationFormatted: this.formatDuration(duration ?? 0), width: videoTrack?.displayWidth ?? 0, height: videoTrack?.displayHeight ?? 0, codec: videoTrack?.codec ?? "unknown", fps, bitrate: 0, audioCodec: audioTrack?.codec ?? undefined, audioChannels: audioTrack?.numberOfChannels, audioSampleRate: audioTrack?.sampleRate, subtitleTracks: subtitleTracks.length, fileSize: 0, }, }; } catch (error) { return { success: false, error: `mediabunny failed: ${error instanceof Error ? error.message : String(error)}`, }; } finally { input?.dispose(); } } /** * Build a structured metadata object from ffprobe data. * * @param probeData - Raw ffprobe output * @param fileSize - Original file size in bytes * @returns Structured video metadata */ buildMetadata(probeData, fileSize) { const videoStream = probeData.streams.find((s) => s.codec_type === "video"); const audioStream = probeData.streams.find((s) => s.codec_type === "audio"); const subtitleStreams = probeData.streams.filter((s) => s.codec_type === "subtitle"); const duration = probeData.format?.duration ? parseFloat(String(probeData.format.duration)) : 0; // Parse FPS from r_frame_rate (e.g., "30000/1001" or "25/1") let fps = 0; if (videoStream?.r_frame_rate) { const parts = String(videoStream.r_frame_rate).split("/"); if (parts.length === 2) { const num = parseFloat(parts[0]); const den = parseFloat(parts[1]); if (den > 0) { fps = Math.round((num / den) * 100) / 100; } } else { fps = parseFloat(parts[0]) || 0; } } return { duration, durationFormatted: this.formatDuration(duration), width: videoStream?.width ?? 0, height: videoStream?.height ?? 0, codec: videoStream?.codec_name ?? "unknown", fps, bitrate: probeData.format?.bit_rate ? parseInt(String(probeData.format.bit_rate), 10) : 0, audioCodec: audioStream?.codec_name, audioChannels: audioStream?.channels, audioSampleRate: audioStream?.sample_rate ? parseInt(String(audioStream.sample_rate), 10) : undefined, subtitleTracks: subtitleStreams.length, fileSize, }; } // =========================================================================== // KEYFRAME EXTRACTION // =========================================================================== /** * Extract keyframes from a video at calculated intervals. * * The interval between frames is determined by the video duration: * - <= 10s: every 1s (very short clips — dense coverage) * - <= 30s: every 2s (short bug clips) * - <= 120s: every 5s (standard screen recordings) * - <= 600s: every 15s (longer demos) * - <= 1800s: every 60s (meeting recordings) * - > 1800s: every 180s (full meetings) * * Results are capped at MAX_FRAMES (100) and each frame is resized * to fit within 768x768px while maintaining aspect ratio. * The interval is adaptive: if the tier interval would exceed MAX_FRAMES, * the interval widens to duration/MAX_FRAMES for full-video coverage. * * @param videoPath - Path to the video file * @param tempDir - Temp directory for frame output * @param durationSec - Video duration in seconds * @returns Array of JPEG frame buffers */ async extractKeyframes(videoPath, tempDir, durationSec) { if (durationSec <= 0) { return []; } // Determine extraction interval based on duration const intervalSec = this.getFrameInterval(durationSec); // Calculate timestamps to extract const timestamps = []; for (let t = 0; t < durationSec && timestamps.length < VIDEO_CONFIG.MAX_FRAMES; t += intervalSec) { timestamps.push(t); } if (timestamps.length === 0) { // For very short videos, grab at least one frame at t=0 timestamps.push(0); } // Extract frames using ffmpeg const framesDir = join(tempDir, "frames"); await fs.mkdir(framesDir, { recursive: true }); await this.runFfmpegFrameExtraction(videoPath, framesDir, timestamps, intervalSec); // Read extracted frames and resize with sharp const keyframes = []; for (let i = 0; i < timestamps.length; i++) { const framePath = join(framesDir, `frame_${String(i + 1).padStart(4, "0")}.jpg`); try { await fs.access(framePath); const rawFrame = await fs.readFile(framePath); // Resize to fit within max dimension while preserving aspect ratio const sharp = (await import("sharp")).default; const resized = await sharp(rawFrame) .resize(VIDEO_CONFIG.FRAME_MAX_DIMENSION, VIDEO_CONFIG.FRAME_MAX_DIMENSION, { fit: "inside", withoutEnlargement: true, }) .jpeg({ quality: VIDEO_CONFIG.FRAME_JPEG_QUALITY }) .toBuffer(); keyframes.push(resized); } catch { // Skip individual frame on resize/encode failure } } return keyframes; } /** * Run ffmpeg to extract frames at specified timestamps. * * Uses the `-vf select` filter to pick frames at exact timestamps, * which is more efficient than seeking for each frame individually. * * @param videoPath - Path to the video file * @param outputDir - Directory to write frame files * @param timestamps - Array of timestamps in seconds */ async runFfmpegFrameExtraction(videoPath, outputDir, timestamps, intervalSec) { const ff = await loadFluentFfmpeg(); return new Promise((resolve, reject) => { // Improved select expression to pick exactly one frame per interval // instead of multiple frames within a 0.5s window. const selectExpr = `isnan(prev_selected_t)+gte(t-prev_selected_t,${intervalSec}-0.001)`; const timeoutId = setTimeout(() => { reject(new Error(`ffmpeg frame extraction timed out after ${VIDEO_CONFIG.FFMPEG_TIMEOUT_MS}ms`)); }, VIDEO_CONFIG.FFMPEG_TIMEOUT_MS); ff(videoPath) .outputOptions([ "-vf", `select='${selectExpr}',scale='min(${VIDEO_CONFIG.FRAME_MAX_DIMENSION}\\,iw):-2'`, "-vsync", "vfr", "-q:v", "3", "-frames:v", String(timestamps.length), ]) .output(join(outputDir, "frame_%04d.jpg")) .on("end", () => { clearTimeout(timeoutId); resolve(); }) .on("error", (err) => { clearTimeout(timeoutId); reject(err); }) .run(); }); } /** * Determine the frame extraction interval based on video duration. * * @param durationSec - Video duration in seconds * @returns Interval in seconds between extracted frames */ getFrameInterval(durationSec) { let intervalSec = 180; // fallback for (const tier of VIDEO_CONFIG.FRAME_INTERVALS) { if (durationSec <= tier.maxDuration) { intervalSec = tier.intervalSec; break; } } // Adaptive: if the tier interval would produce more frames than MAX_FRAMES, // widen the interval so frames are evenly distributed across the full video const estimatedFrames = Math.floor(durationSec / intervalSec); if (estimatedFrames > VIDEO_CONFIG.MAX_FRAMES) { intervalSec = Math.ceil(durationSec / VIDEO_CONFIG.MAX_FRAMES); } return intervalSec; } // =========================================================================== // SUBTITLE EXTRACTION // =========================================================================== /** * Extract embedded subtitle text from the first subtitle track. * * Uses ffmpeg to convert the first subtitle stream to SRT format, * then strips SRT formatting (timestamps, sequence numbers) to produce * plain text. * * @param videoPath - Path to the video file * @param tempDir - Temp directory for subtitle output * @returns Extracted subtitle text, or undefined if extraction fails */ async extractSubtitles(videoPath, tempDir) { const subtitlePath = join(tempDir, "subtitles.srt"); const ffSub = await loadFluentFfmpeg(); await new Promise((resolve, reject) => { const timeoutId = setTimeout(() => { reject(new Error(`ffmpeg subtitle extraction timed out after ${VIDEO_CONFIG.FFMPEG_TIMEOUT_MS}ms`)); }, VIDEO_CONFIG.FFMPEG_TIMEOUT_MS); ffSub(videoPath) .outputOptions(["-map", "0:s:0", "-c:s", "srt"]) .output(subtitlePath) .on("end", () => { clearTimeout(timeoutId); resolve(); }) .on("error", (err) => { clearTimeout(timeoutId); reject(err); }) .run(); }); try { const srtContent = await fs.readFile(subtitlePath, "utf-8"); return this.parseSrtToPlainText(srtContent); } catch { return undefined; } } /** * Parse SRT subtitle content into plain text. * Strips sequence numbers, timestamps, and blank lines. * * @param srt - Raw SRT content * @returns Plain text from subtitles */ parseSrtToPlainText(srt) { if (!srt.trim()) { return ""; } return srt .split("\n") .filter((line) => { const trimmed = line.trim(); // Skip empty lines if (!trimmed) { return false; } // Skip sequence numbers (pure digits) if (/^\d+$/.test(trimmed)) { return false; } // Skip timestamp lines (e.g., "00:01:23,456 --> 00:01:25,789") if (/^\d{2}:\d{2}:\d{2}[,.]\d{3}\s*-->/.test(trimmed)) { return false; } return true; }) .map((line) => line.trim()) .join("\n") .trim(); } // =========================================================================== // TEXT CONTENT BUILDER // =========================================================================== /** * Build a structured text description of the video for LLM consumption. * * The output includes: * - File name and basic info * - Technical metadata (resolution, codec, duration, etc.) * - Frame extraction summary * - Subtitle text (if available) * * @param metadata - Extracted video metadata * @param frameCount - Number of keyframes extracted * @param subtitleText - Extracted subtitle text (if any) * @param filename - Original filename * @returns Formatted text content for the LLM */ buildTextContent(metadata, frameCount, subtitleText, filename) { const lines = []; lines.push(`[Video File: ${filename}]`); lines.push(""); lines.push("## Video Metadata"); lines.push(`- Duration: ${metadata.durationFormatted}`); lines.push(`- Resolution: ${metadata.width}x${metadata.height}`); lines.push(`- Video Codec: ${metadata.codec}`); if (metadata.fps > 0) { lines.push(`- Frame Rate: ${metadata.fps} fps`); } if (metadata.bitrate > 0) { lines.push(`- Bitrate: ${(metadata.bitrate / 1000).toFixed(0)} kbps`); } if (metadata.audioCodec) { lines.push(`- Audio Codec: ${metadata.audioCodec}`); if (metadata.audioChannels) { lines.push(`- Audio Channels: ${metadata.audioChannels === 1 ? "mono" : metadata.audioChannels === 2 ? "stereo" : `${metadata.audioChannels}ch`}`); } if (metadata.audioSampleRate) { lines.push(`- Audio Sample Rate: ${(metadata.audioSampleRate / 1000).toFixed(1)} kHz`); } } lines.push(`- File Size: ${(metadata.fileSize / (1024 * 1024)).toFixed(1)} MB`); lines.push(""); if (frameCount > 0) { const intervalSec = this.getFrameInterval(metadata.duration); lines.push(`## Keyframes (${frameCount} frames extracted every ~${intervalSec}s)`); lines.push("The following images are keyframes extracted from the video at regular intervals."); } else { lines.push("## Keyframes"); lines.push("No keyframes could be extracted from this video (it may be audio-only or use an unsupported codec)."); } if (subtitleText) { lines.push(""); lines.push("## Subtitles / Captions"); // Truncate very long subtitle text to avoid blowing up context const maxSubtitleChars = 10_000; if (subtitleText.length > maxSubtitleChars) { lines.push(subtitleText.substring(0, maxSubtitleChars) + `\n... [truncated, ${subtitleText.length - maxSubtitleChars} more characters]`); } else { lines.push(subtitleText); } } return lines.join("\n"); } // =========================================================================== // UTILITY METHODS // =========================================================================== /** * Format a duration in seconds to a human-readable string. * * @param seconds - Duration in seconds * @returns Formatted string (e.g., "1h 23m 45s") */ formatDuration(seconds) { if (seconds <= 0) { return "0s"; } const hours = Math.floor(seconds / 3600); const minutes = Math.floor((seconds % 3600) / 60); const secs = Math.floor(seconds % 60); const parts = []; if (hours > 0) { parts.push(`${hours}h`); } if (minutes > 0) { parts.push(`${minutes}m`); } if (secs > 0 || parts.length === 0) { parts.push(`${secs}s`); } return parts.join(" "); } /** * Get a file extension from FileInfo, falling back to ".mp4". * * @param fileInfo - File information * @returns File extension with leading dot */ getExtensionFromFileInfo(fileInfo) { const name = fileInfo.name || ""; const dotIndex = name.lastIndexOf("."); if (dotIndex >= 0) { return name.substring(dotIndex).toLowerCase(); } // Fallback: derive from MIME type const mimeExtMap = { "video/mp4": ".mp4", "video/x-matroska": ".mkv", "video/quicktime": ".mov", "video/webm": ".webm", "video/x-msvideo": ".avi", "video/x-ms-wmv": ".wmv", "video/x-flv": ".flv", "video/3gpp": ".3gp", "video/3gpp2": ".3g2", "video/MP2T": ".ts", "video/ogg": ".ogv", }; return mimeExtMap[fileInfo.mimetype] || ".mp4"; } /** * Write a buffer to a file using streaming to handle large files efficiently. * * @param buffer - Buffer to write * @param filePath - Destination file path */ async writeBufferToFile(buffer, filePath) { const readable = Readable.from(buffer); const writable = createWriteStream(filePath); await pipeline(readable, writable); } // =========================================================================== // TARGETED EXTRACTION API // =========================================================================== /** * Extract frames from a specific time range in a video. * * This is the on-demand extraction method called by the `extract_file_content` * tool. Unlike initial keyframe extraction (which covers the full video), * this targets a specific time window with configurable frame count. * * @param buffer - Video file buffer * @param filename - Original filename (for extension detection) * @param startSec - Start time in seconds * @param endSec - End time in seconds * @param frameCount - Number of frames to extract in the range (default: 5) * @returns Array of JPEG frame buffers */ async extractFrameRange(buffer, filename, startSec, endSec, frameCount = 5) { await initFfmpegPaths(); const tempDir = join(tmpdir(), `neurolink-video-extract-${randomUUID()}`); try { await fs.mkdir(tempDir, { recursive: true }); // Write buffer to temp file const ext = this.guessExtensionFromName(filename); const tempVideoPath = join(tempDir, `input${ext}`); await this.writeBufferToFile(buffer, tempVideoPath); // Calculate evenly-spaced timestamps within the range const duration = endSec - startSec; if (duration <= 0) { return []; } const clampedCount = Math.min(frameCount, VIDEO_CONFIG.MAX_FRAMES); const timestamps = []; let interval = duration; if (clampedCount === 1) { timestamps.push(startSec); } else { interval = duration / (clampedCount - 1); for (let i = 0; i < clampedCount; i++) { timestamps.push(startSec + interval * i); } } // Extract frames const framesDir = join(tempDir, "frames"); await fs.mkdir(framesDir, { recursive: true }); await this.runFfmpegFrameExtraction(tempVideoPath, framesDir, timestamps, interval); // Read and resize frames const keyframes = []; for (let i = 0; i < timestamps.length; i++) { const framePath = join(framesDir, `frame_${String(i + 1).padStart(4, "0")}.jpg`); try { await fs.access(framePath); const rawFrame = await fs.readFile(framePath); const sharp = (await import("sharp")).default; const resized = await sharp(rawFrame) .resize(VIDEO_CONFIG.FRAME_MAX_DIMENSION, VIDEO_CONFIG.FRAME_MAX_DIMENSION, { fit: "inside", withoutEnlargement: true, }) .jpeg({ quality: VIDEO_CONFIG.FRAME_JPEG_QUALITY }) .toBuffer(); keyframes.push(resized); } catch { // Skip individual frame failures } } return keyframes; } finally { await fs.rm(tempDir, { recursive: true, force: true }).catch(() => { /* cleanup - ignore temp dir removal errors */ }); } } /** * Guess file extension from filename, with fallback to .mp4. */ guessExtensionFromName(filename) { const dotIndex = filename.lastIndexOf("."); if (dotIndex >= 0) { return filename.substring(dotIndex).toLowerCase(); } return ".mp4"; } } // ============================================================================= // SINGLETON INSTANCE // ============================================================================= /** * Singleton Video processor instance. * Use this for standard video processing operations. * * @example * ```typescript * import { videoProcessor } from "./VideoProcessor.js"; * * const result = await videoProcessor.processFile(fileInfo); * ``` */ export const videoProcessor = new VideoProcessor(); // ============================================================================= // HELPER FUNCTIONS // ============================================================================= /** * Check if a file is a video file. * Matches by MIME type or file extension. * * @param mimetype - MIME type of the file * @param filename - Filename (for extension-based detection) * @returns true if the file is a supported video file * * @example * ```typescript * if (isVideoFile("video/mp4", "recording.mp4")) { * const result = await processVideo(fileInfo); * } * * if (isVideoFile("", "clip.mkv")) { * // Also matches by extension * } * ``` */ export function isVideoFile(mimetype, filename) { return videoProcessor.isFileSupported(mimetype, filename); } /** * Process a single video file. * Convenience function that uses the singleton processor. * * @param fileInfo - File information (can include URL or buffer) * @param options - Optional processing options (auth headers, timeout, retry config) * @returns Processing result with extracted video data or error * * @example * ```typescript * import { processVideo } from "./VideoProcessor.js"; * * const result = await processVideo({ * id: "vid-123", * name: "demo.mp4", * mimetype: "video/mp4", * size: 15_000_000, * buffer: videoBuffer, * }); * * if (result.success) { * console.log(`Duration: ${result.data.metadata.durationFormatted}`); * console.log(`Extracted ${result.data.frameCount} keyframes`); * console.log(`Text content:\n${result.data.textContent}`); * * if (result.data.subtitleText) { * console.log(`Subtitles:\n${result.data.subtitleText}`); * } * } else { * console.error(`Processing failed: ${result.error?.userMessage}`); * } * ``` */ export async function processVideo(fileInfo, options) { return videoProcessor.processFile(fileInfo, options); }