@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
1,135 lines (1,134 loc) • 47.2 kB
JavaScript
/**
* Video Processor
*
* Handles downloading, validating, and processing video files for AI consumption.
* Since LLMs cannot process raw video, this processor extracts:
* - Structured metadata (duration, resolution, codecs, etc.)
* - Keyframes at configurable intervals (resized to 768px JPEG)
* - Embedded subtitle tracks (if present)
*
* The extracted content is formatted as text + images that can be sent to any
* AI provider for analysis.
*
* Uses mediabunny (pure TypeScript) for metadata extraction, with fluent-ffmpeg
* as a fallback for unsupported formats. Requires ffmpeg for keyframe/subtitle
* extraction (via ffmpeg-static or system PATH).
*
* Key features:
* - Adaptive keyframe extraction intervals based on video duration
* - Frame count capping (max 20 frames) to control token usage
* - JPEG quality optimization for AI vision models
* - Embedded subtitle extraction (SRT format)
* - Graceful degradation on corrupt files or missing codecs
* - Temp file cleanup with finally blocks
* - Configurable timeouts for ffmpeg and ffprobe operations
*
* @module processors/media/VideoProcessor
*
* @example
* ```typescript
* import { videoProcessor, processVideo, isVideoFile } from "./VideoProcessor.js";
*
* // Check if a file is a video file
* if (isVideoFile(fileInfo.mimetype, fileInfo.name)) {
* const result = await processVideo(fileInfo, {
* authHeaders: { Authorization: "Bearer token" },
* });
*
* if (result.success) {
* console.log(`Duration: ${result.data.metadata.durationFormatted}`);
* console.log(`Keyframes: ${result.data.frameCount}`);
* console.log(`Text for LLM:\n${result.data.textContent}`);
* }
* }
* ```
*/
import { randomUUID } from "crypto";
import { createWriteStream, existsSync, promises as fs } from "fs";
import { tmpdir } from "os";
import { join } from "path";
import { Readable } from "stream";
import { pipeline } from "stream/promises";
import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
import { SIZE_LIMITS_MB } from "../config/index.js";
import { FileErrorCode } from "../errors/index.js";
import { tracers, ATTR, withSpan } from "../../telemetry/index.js";
import { logger } from "../../utils/logger.js";
// fluent-ffmpeg's default export is callable + has static methods — avoid caching
// the module type (it confuses TS); Node's module cache handles dedup.
async function loadFluentFfmpeg() {
try {
const mod = await import(/* @vite-ignore */ "fluent-ffmpeg");
return mod.default;
}
catch (err) {
const e = err instanceof Error ? err : null;
if (e?.code === "ERR_MODULE_NOT_FOUND" &&
e.message.includes("fluent-ffmpeg")) {
throw new Error('Video processing requires the "fluent-ffmpeg" package. Install it with:\n pnpm add fluent-ffmpeg', { cause: err });
}
throw err;
}
}
let _mediabunny = null;
async function loadMediaBunny() {
if (_mediabunny) {
return _mediabunny;
}
try {
_mediabunny = await import(/* @vite-ignore */ "mediabunny");
return _mediabunny;
}
catch (err) {
const e = err instanceof Error ? err : null;
if (e?.code === "ERR_MODULE_NOT_FOUND" &&
e.message.includes("mediabunny")) {
throw new Error('Video processing requires the "mediabunny" package. Install it with:\n pnpm add mediabunny', { cause: err });
}
throw err;
}
}
// =============================================================================
// FFMPEG PATH INITIALIZATION
// =============================================================================
/**
* Whether ffmpeg/ffprobe paths have been initialized.
* We only attempt path resolution once to avoid repeated dynamic import overhead.
*/
let ffmpegPathInitialized = false;
/**
* Initialize ffmpeg binary paths.
* Tries ffmpeg-static first, falls back to system binary in PATH.
*
* Note: ffprobe-static has been removed. Metadata probing now uses mediabunny
* (pure TypeScript) as the primary method, with ffprobe as a fallback only when
* mediabunny cannot handle the format (e.g., AVI, FLV).
*
* This is called lazily on the first processFile() invocation so that the module
* can be imported without side effects.
*/
async function initFfmpegPaths() {
if (ffmpegPathInitialized) {
return;
}
ffmpegPathInitialized = true;
// Try ffmpeg-static first, fall back to system ffmpeg.
// IMPORTANT: Verify the binary actually exists before setting the path.
// On some platforms (e.g., macOS ARM), ffmpeg-static installs the npm package
// but the pre-built binary download fails silently, leaving a non-existent path.
// If we set a bad path, ffmpeg commands fail with ENOENT instead of using
// the perfectly good system ffmpeg in PATH.
try {
const ffmpegStatic = await import("ffmpeg-static");
const ffmpegPath = ffmpegStatic.default;
if (typeof ffmpegPath === "string" && existsSync(ffmpegPath)) {
const ff = await loadFluentFfmpeg();
ff.setFfmpegPath(ffmpegPath);
}
}
catch {
// Use system ffmpeg (already in PATH)
}
}
// =============================================================================
// TYPES
// =============================================================================
// =============================================================================
// CONSTANTS
// =============================================================================
/**
* Video processing configuration constants.
* Controls frame extraction behavior, quality, and timeout limits.
*/
const VIDEO_CONFIG = {
/** Maximum number of keyframes to extract from a video */
MAX_FRAMES: 100,
/**
* Frame extraction intervals based on video duration.
* Shorter videos get more frequent frames; longer videos use wider intervals.
*/
FRAME_INTERVALS: [
{ maxDuration: 10, intervalSec: 1 }, // 10s → up to 10 frames
{ maxDuration: 30, intervalSec: 2 }, // 30s → up to 15 frames
{ maxDuration: 120, intervalSec: 3 }, // 2min → up to 40 frames
{ maxDuration: 600, intervalSec: 6 }, // 10min → up to 100 frames
{ maxDuration: 1800, intervalSec: 20 }, // 30min → up to 90 frames
{ maxDuration: Infinity, intervalSec: 60 }, // >30min → adaptive kicks in
],
/** Maximum dimension (width or height) for extracted keyframes in pixels */
FRAME_MAX_DIMENSION: 768,
/** JPEG quality for extracted keyframes (0-100) */
FRAME_JPEG_QUALITY: 80,
/** Timeout for ffmpeg frame extraction / subtitle extraction in milliseconds */
FFMPEG_TIMEOUT_MS: 120_000,
/** Timeout for ffprobe metadata extraction in milliseconds */
FFPROBE_TIMEOUT_MS: 10_000,
};
/** Supported video MIME types */
const SUPPORTED_VIDEO_MIME_TYPES = [
"video/mp4",
"video/x-matroska",
"video/quicktime",
"video/webm",
"video/x-msvideo",
"video/x-ms-wmv",
"video/x-flv",
"video/3gpp",
"video/3gpp2",
"video/MP2T",
"video/ogg",
];
/** Supported video file extensions */
const SUPPORTED_VIDEO_EXTENSIONS = [
".mp4",
".m4v",
".mkv",
".mov",
".avi",
".wmv",
".flv",
".webm",
".3gp",
".3g2",
".ts",
".mts",
".m2ts",
".ogv",
".vob",
];
/**
* Maximum video file size in MB.
* Uses VIDEO_MAX_MB (500 MB) to support long meeting recordings and screen captures.
*/
const VIDEO_MAX_SIZE_MB = SIZE_LIMITS_MB.VIDEO_MAX_MB;
/** Default timeout for video download (2 minutes for larger files) */
const VIDEO_DOWNLOAD_TIMEOUT_MS = 120_000;
// =============================================================================
// VIDEO PROCESSOR CLASS
// =============================================================================
/**
* Video Processor - extracts metadata, keyframes, and subtitles from video files.
*
* Since LLMs cannot process raw video, this processor converts videos into
* a structured representation consisting of:
* 1. Text metadata block (duration, resolution, codecs, etc.)
* 2. Keyframe images (JPEG, resized to 768px max dimension)
* 3. Subtitle text (if embedded in the video)
*
* The processor uses a temp file approach because ffmpeg requires file paths
* for most operations. Temp files are always cleaned up in finally blocks.
*
* @example
* ```typescript
* const processor = new VideoProcessor();
* const result = await processor.processFile({
* id: "video-1",
* name: "presentation.mp4",
* mimetype: "video/mp4",
* size: 15_000_000,
* buffer: videoBuffer,
* });
*
* if (result.success) {
* // result.data.textContent - text description for LLM
* // result.data.keyframes - array of JPEG buffers
* // result.data.subtitleText - extracted subtitles (if any)
* }
* ```
*/
export class VideoProcessor extends BaseFileProcessor {
constructor() {
super({
maxSizeMB: VIDEO_MAX_SIZE_MB,
timeoutMs: VIDEO_DOWNLOAD_TIMEOUT_MS,
supportedMimeTypes: [...SUPPORTED_VIDEO_MIME_TYPES],
supportedExtensions: [...SUPPORTED_VIDEO_EXTENSIONS],
fileTypeName: "video",
defaultFilename: "video.mp4",
});
}
// ===========================================================================
// ABSTRACT METHOD IMPLEMENTATION
// ===========================================================================
/**
* Build processed result stub.
* This is a synchronous placeholder - actual processing happens in the
* overridden processFile method since ffmpeg operations are asynchronous
* and require temp file I/O.
*
* @param buffer - Downloaded file content
* @param fileInfo - Original file information
* @returns Empty ProcessedVideo structure
*/
buildProcessedResult(buffer, fileInfo) {
return {
buffer,
mimetype: fileInfo.mimetype || "video/mp4",
size: fileInfo.size,
filename: this.getFilename(fileInfo),
textContent: "",
keyframes: [],
metadata: {
duration: 0,
durationFormatted: "0s",
width: 0,
height: 0,
codec: "unknown",
fps: 0,
bitrate: 0,
subtitleTracks: 0,
fileSize: fileInfo.size,
},
hasKeyframes: false,
frameCount: 0,
};
}
// ===========================================================================
// MAIN PROCESSING OVERRIDE
// ===========================================================================
/**
* Override processFile for async video processing with ffmpeg.
*
* Processing pipeline:
* 1. Validate file type and size
* 2. Get buffer (from fileInfo.buffer or download from URL)
* 3. Write buffer to temp file (ffmpeg requires file paths)
* 4. Extract metadata using ffprobe
* 5. Extract keyframes at calculated intervals, resize with sharp
* 6. Extract subtitle tracks if embedded
* 7. Build textContent summary for LLM
* 8. Clean up temp files
*
* @param fileInfo - File information with URL or buffer
* @param options - Optional processing options
* @returns Processing result with extracted video data or error
*/
async processFile(fileInfo, options) {
const filename = this.getFilename(fileInfo);
const sizeBytes = fileInfo.size || fileInfo.buffer?.length || 0;
return withSpan({
name: "neurolink.file.video.process",
tracer: tracers.file,
attributes: {
[ATTR.FILE_NAME]: filename,
[ATTR.FILE_MIMETYPE]: fileInfo.mimetype || "video/mp4",
[ATTR.FILE_SIZE_BYTES]: sizeBytes,
},
}, async (span) => {
logger.info(`[NEUROLINK] Video processing started: ${filename} (${(sizeBytes / (1024 * 1024)).toFixed(2)} MB, ${fileInfo.mimetype || "video/mp4"})`);
// Ensure ffmpeg paths are initialized before any processing
await initFfmpegPaths();
// Temp directory for this processing run
const tempDir = join(tmpdir(), `neurolink-video-${randomUUID()}`);
let tempCreated = false;
try {
// Step 1: Validate file type and size
const validationResult = this.validateFileWithResult(fileInfo);
if (!validationResult.success) {
const validationErrMsg = validationResult.error?.message || "Validation failed";
span.setAttribute(ATTR.FILE_SUCCESS, false);
span.setAttribute(ATTR.FILE_ERROR, validationErrMsg);
logger.warn(`[NEUROLINK] Video skipped/failed: ${filename} — reason: ${validationErrMsg}`);
return { success: false, error: validationResult.error };
}
// Step 2: Get file buffer
let buffer;
if (fileInfo.buffer) {
buffer = fileInfo.buffer;
}
else if (fileInfo.url) {
const downloadResult = await this.downloadFileWithRetry(fileInfo, options);
if (!downloadResult.success) {
const downloadErrMsg = downloadResult.error?.message || "Download failed";
span.setAttribute(ATTR.FILE_SUCCESS, false);
span.setAttribute(ATTR.FILE_ERROR, downloadErrMsg);
logger.warn(`[NEUROLINK] Video skipped/failed: ${filename} — reason: ${downloadErrMsg}`);
return { success: false, error: downloadResult.error };
}
if (!downloadResult.data) {
const errMsg = "Download succeeded but returned no data";
span.setAttribute(ATTR.FILE_SUCCESS, false);
span.setAttribute(ATTR.FILE_ERROR, errMsg);
logger.warn(`[NEUROLINK] Video skipped/failed: ${filename} — reason: ${errMsg}`);
return {
success: false,
error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
reason: errMsg,
}),
};
}
buffer = downloadResult.data;
// Validate actual downloaded size
if (!this.validateFileSize(buffer.length)) {
const errMsg = `File too large: ${(buffer.length / (1024 * 1024)).toFixed(2)} MB (max: ${this.config.maxSizeMB} MB)`;
span.setAttribute(ATTR.FILE_SUCCESS, false);
span.setAttribute(ATTR.FILE_ERROR, errMsg);
logger.warn(`[NEUROLINK] Video skipped/failed: ${filename} — reason: ${errMsg}`);
return {
success: false,
error: this.createError(FileErrorCode.FILE_TOO_LARGE, {
sizeMB: (buffer.length / (1024 * 1024)).toFixed(2),
maxMB: this.config.maxSizeMB,
type: this.config.fileTypeName,
}),
};
}
}
else {
const errMsg = "No buffer or URL provided for file";
span.setAttribute(ATTR.FILE_SUCCESS, false);
span.setAttribute(ATTR.FILE_ERROR, errMsg);
logger.warn(`[NEUROLINK] Video skipped/failed: ${filename} — reason: ${errMsg}`);
return {
success: false,
error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
reason: errMsg,
}),
};
}
// Step 3: Write buffer to temp file (ffmpeg needs a file path)
await fs.mkdir(tempDir, { recursive: true });
tempCreated = true;
const extension = this.getExtensionFromFileInfo(fileInfo);
const tempVideoPath = join(tempDir, `input${extension}`);
await this.writeBufferToFile(buffer, tempVideoPath);
// Step 4: Extract metadata — try mediabunny first (pure TS, no binary),
// fall back to ffprobe for formats mediabunny doesn't support (AVI, FLV, WMV).
let metadata;
const mediabunnyResult = await this.probeVideoWithMediabunny(tempVideoPath);
if (mediabunnyResult.success && mediabunnyResult.data) {
metadata = { ...mediabunnyResult.data, fileSize: buffer.length };
}
else {
// Fall back to ffprobe (requires system ffprobe to be available)
const probeResult = await this.probeVideo(tempVideoPath);
if (probeResult.success && probeResult.data) {
metadata = this.buildMetadata(probeResult.data, buffer.length);
}
}
if (!metadata) {
metadata = {
duration: 0,
durationFormatted: "unknown",
width: 0,
height: 0,
codec: "unknown",
fps: 0,
bitrate: 0,
subtitleTracks: 0,
fileSize: buffer.length,
};
}
// Record video-specific metadata on span
span.setAttribute(ATTR.VIDEO_DURATION_SEC, metadata.duration);
span.setAttribute(ATTR.VIDEO_WIDTH, metadata.width);
span.setAttribute(ATTR.VIDEO_HEIGHT, metadata.height);
span.setAttribute(ATTR.VIDEO_CODEC, metadata.codec);
span.setAttribute(ATTR.VIDEO_HAS_SUBTITLES, metadata.subtitleTracks > 0);
// Step 5: Extract keyframes
let keyframes = [];
try {
keyframes = await this.extractKeyframes(tempVideoPath, tempDir, metadata.duration);
}
catch {
// Non-fatal: continue without keyframes if extraction fails
// (e.g., audio-only file in a video container)
logger.warn(`[NEUROLINK] Video keyframe extraction failed for ${filename}, continuing without keyframes`);
}
span.setAttribute(ATTR.VIDEO_KEYFRAMES_EXTRACTED, keyframes.length);
// Step 6: Extract subtitles
let subtitleText;
if (metadata.subtitleTracks > 0) {
try {
subtitleText = await this.extractSubtitles(tempVideoPath, tempDir);
}
catch {
// Non-fatal: continue without subtitles if extraction fails
}
}
// Step 7: Build textContent for LLM
const textContent = this.buildTextContent(metadata, keyframes.length, subtitleText, this.getFilename(fileInfo));
span.setAttribute(ATTR.VIDEO_TEXT_CONTENT_LENGTH, textContent.length);
span.setAttribute(ATTR.FILE_OUTPUT_LENGTH, textContent.length);
span.setAttribute(ATTR.FILE_SUCCESS, true);
logger.info(`[NEUROLINK] Video processed: ${filename} → ${textContent.length} bytes text + ${keyframes.length} keyframes ` +
`(${metadata.durationFormatted}, ${metadata.width}x${metadata.height}, ${metadata.codec})`);
// Step 8: Return structured result
return {
success: true,
data: {
buffer,
mimetype: fileInfo.mimetype || "video/mp4",
size: fileInfo.size,
filename: this.getFilename(fileInfo),
textContent,
keyframes,
metadata,
subtitleText,
hasKeyframes: keyframes.length > 0,
frameCount: keyframes.length,
},
};
}
catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
span.setAttribute(ATTR.FILE_SUCCESS, false);
span.setAttribute(ATTR.FILE_ERROR, errMsg);
logger.error(`[NEUROLINK] Video processing failed: ${filename} — ${errMsg}`);
return {
success: false,
error: this.createError(FileErrorCode.PROCESSING_FAILED, {
fileType: "video",
error: errMsg,
}, error instanceof Error ? error : undefined),
};
}
finally {
// Step 8: Clean up temp files
if (tempCreated) {
await fs.rm(tempDir, { recursive: true, force: true }).catch(() => {
// Ignore cleanup errors - temp files will be cleaned by OS eventually
});
}
}
});
}
// ===========================================================================
// METADATA EXTRACTION
// ===========================================================================
/**
* Probe a video file to extract metadata using ffprobe.
*
* @param filePath - Path to the video file
* @returns Success result with probe data or error message
*/
async probeVideo(filePath) {
const ffmpeg = await loadFluentFfmpeg();
return new Promise((resolve) => {
const timeoutId = setTimeout(() => {
resolve({
success: false,
error: `ffprobe timed out after ${VIDEO_CONFIG.FFPROBE_TIMEOUT_MS}ms`,
});
}, VIDEO_CONFIG.FFPROBE_TIMEOUT_MS);
ffmpeg.ffprobe(filePath, (err, data) => {
clearTimeout(timeoutId);
if (err) {
resolve({
success: false,
error: `ffprobe failed: ${err.message}`,
});
}
else {
resolve({ success: true, data });
}
});
});
}
/**
* Probe a video file using mediabunny (pure TypeScript, no native binary).
* Falls back to ffprobe if mediabunny fails or doesn't support the format.
*/
async probeVideoWithMediabunny(filePath) {
const mb = await loadMediaBunny();
let input;
try {
input = new mb.Input({
source: new mb.FilePathSource(filePath),
formats: [...mb.ALL_FORMATS],
});
const duration = await input.computeDuration();
const videoTrack = await input.getPrimaryVideoTrack();
const audioTrack = await input.getPrimaryAudioTrack();
const allTracks = await input.getTracks();
const subtitleTracks = allTracks.filter((t) => !t.isVideoTrack() && !t.isAudioTrack());
// Get FPS from video track packet stats (sample a small number of packets)
let fps = 0;
if (videoTrack) {
try {
const stats = await videoTrack.computePacketStats(120);
fps = Math.round(stats.averagePacketRate * 100) / 100;
}
catch {
// FPS unavailable — non-fatal
}
}
return {
success: true,
data: {
duration: duration ?? 0,
durationFormatted: this.formatDuration(duration ?? 0),
width: videoTrack?.displayWidth ?? 0,
height: videoTrack?.displayHeight ?? 0,
codec: videoTrack?.codec ?? "unknown",
fps,
bitrate: 0,
audioCodec: audioTrack?.codec ?? undefined,
audioChannels: audioTrack?.numberOfChannels,
audioSampleRate: audioTrack?.sampleRate,
subtitleTracks: subtitleTracks.length,
fileSize: 0,
},
};
}
catch (error) {
return {
success: false,
error: `mediabunny failed: ${error instanceof Error ? error.message : String(error)}`,
};
}
finally {
input?.dispose();
}
}
/**
* Build a structured metadata object from ffprobe data.
*
* @param probeData - Raw ffprobe output
* @param fileSize - Original file size in bytes
* @returns Structured video metadata
*/
buildMetadata(probeData, fileSize) {
const videoStream = probeData.streams.find((s) => s.codec_type === "video");
const audioStream = probeData.streams.find((s) => s.codec_type === "audio");
const subtitleStreams = probeData.streams.filter((s) => s.codec_type === "subtitle");
const duration = probeData.format?.duration
? parseFloat(String(probeData.format.duration))
: 0;
// Parse FPS from r_frame_rate (e.g., "30000/1001" or "25/1")
let fps = 0;
if (videoStream?.r_frame_rate) {
const parts = String(videoStream.r_frame_rate).split("/");
if (parts.length === 2) {
const num = parseFloat(parts[0]);
const den = parseFloat(parts[1]);
if (den > 0) {
fps = Math.round((num / den) * 100) / 100;
}
}
else {
fps = parseFloat(parts[0]) || 0;
}
}
return {
duration,
durationFormatted: this.formatDuration(duration),
width: videoStream?.width ?? 0,
height: videoStream?.height ?? 0,
codec: videoStream?.codec_name ?? "unknown",
fps,
bitrate: probeData.format?.bit_rate
? parseInt(String(probeData.format.bit_rate), 10)
: 0,
audioCodec: audioStream?.codec_name,
audioChannels: audioStream?.channels,
audioSampleRate: audioStream?.sample_rate
? parseInt(String(audioStream.sample_rate), 10)
: undefined,
subtitleTracks: subtitleStreams.length,
fileSize,
};
}
// ===========================================================================
// KEYFRAME EXTRACTION
// ===========================================================================
/**
* Extract keyframes from a video at calculated intervals.
*
* The interval between frames is determined by the video duration:
* - <= 10s: every 1s (very short clips — dense coverage)
* - <= 30s: every 2s (short bug clips)
* - <= 120s: every 5s (standard screen recordings)
* - <= 600s: every 15s (longer demos)
* - <= 1800s: every 60s (meeting recordings)
* - > 1800s: every 180s (full meetings)
*
* Results are capped at MAX_FRAMES (100) and each frame is resized
* to fit within 768x768px while maintaining aspect ratio.
* The interval is adaptive: if the tier interval would exceed MAX_FRAMES,
* the interval widens to duration/MAX_FRAMES for full-video coverage.
*
* @param videoPath - Path to the video file
* @param tempDir - Temp directory for frame output
* @param durationSec - Video duration in seconds
* @returns Array of JPEG frame buffers
*/
async extractKeyframes(videoPath, tempDir, durationSec) {
if (durationSec <= 0) {
return [];
}
// Determine extraction interval based on duration
const intervalSec = this.getFrameInterval(durationSec);
// Calculate timestamps to extract
const timestamps = [];
for (let t = 0; t < durationSec && timestamps.length < VIDEO_CONFIG.MAX_FRAMES; t += intervalSec) {
timestamps.push(t);
}
if (timestamps.length === 0) {
// For very short videos, grab at least one frame at t=0
timestamps.push(0);
}
// Extract frames using ffmpeg
const framesDir = join(tempDir, "frames");
await fs.mkdir(framesDir, { recursive: true });
await this.runFfmpegFrameExtraction(videoPath, framesDir, timestamps, intervalSec);
// Read extracted frames and resize with sharp
const keyframes = [];
for (let i = 0; i < timestamps.length; i++) {
const framePath = join(framesDir, `frame_${String(i + 1).padStart(4, "0")}.jpg`);
try {
await fs.access(framePath);
const rawFrame = await fs.readFile(framePath);
// Resize to fit within max dimension while preserving aspect ratio
const sharp = (await import("sharp")).default;
const resized = await sharp(rawFrame)
.resize(VIDEO_CONFIG.FRAME_MAX_DIMENSION, VIDEO_CONFIG.FRAME_MAX_DIMENSION, {
fit: "inside",
withoutEnlargement: true,
})
.jpeg({ quality: VIDEO_CONFIG.FRAME_JPEG_QUALITY })
.toBuffer();
keyframes.push(resized);
}
catch {
// Skip individual frame on resize/encode failure
}
}
return keyframes;
}
/**
* Run ffmpeg to extract frames at specified timestamps.
*
* Uses the `-vf select` filter to pick frames at exact timestamps,
* which is more efficient than seeking for each frame individually.
*
* @param videoPath - Path to the video file
* @param outputDir - Directory to write frame files
* @param timestamps - Array of timestamps in seconds
*/
async runFfmpegFrameExtraction(videoPath, outputDir, timestamps, intervalSec) {
const ff = await loadFluentFfmpeg();
return new Promise((resolve, reject) => {
// Improved select expression to pick exactly one frame per interval
// instead of multiple frames within a 0.5s window.
const selectExpr = `isnan(prev_selected_t)+gte(t-prev_selected_t,${intervalSec}-0.001)`;
const timeoutId = setTimeout(() => {
reject(new Error(`ffmpeg frame extraction timed out after ${VIDEO_CONFIG.FFMPEG_TIMEOUT_MS}ms`));
}, VIDEO_CONFIG.FFMPEG_TIMEOUT_MS);
ff(videoPath)
.outputOptions([
"-vf",
`select='${selectExpr}',scale='min(${VIDEO_CONFIG.FRAME_MAX_DIMENSION}\\,iw):-2'`,
"-vsync",
"vfr",
"-q:v",
"3",
"-frames:v",
String(timestamps.length),
])
.output(join(outputDir, "frame_%04d.jpg"))
.on("end", () => {
clearTimeout(timeoutId);
resolve();
})
.on("error", (err) => {
clearTimeout(timeoutId);
reject(err);
})
.run();
});
}
/**
* Determine the frame extraction interval based on video duration.
*
* @param durationSec - Video duration in seconds
* @returns Interval in seconds between extracted frames
*/
getFrameInterval(durationSec) {
let intervalSec = 180; // fallback
for (const tier of VIDEO_CONFIG.FRAME_INTERVALS) {
if (durationSec <= tier.maxDuration) {
intervalSec = tier.intervalSec;
break;
}
}
// Adaptive: if the tier interval would produce more frames than MAX_FRAMES,
// widen the interval so frames are evenly distributed across the full video
const estimatedFrames = Math.floor(durationSec / intervalSec);
if (estimatedFrames > VIDEO_CONFIG.MAX_FRAMES) {
intervalSec = Math.ceil(durationSec / VIDEO_CONFIG.MAX_FRAMES);
}
return intervalSec;
}
// ===========================================================================
// SUBTITLE EXTRACTION
// ===========================================================================
/**
* Extract embedded subtitle text from the first subtitle track.
*
* Uses ffmpeg to convert the first subtitle stream to SRT format,
* then strips SRT formatting (timestamps, sequence numbers) to produce
* plain text.
*
* @param videoPath - Path to the video file
* @param tempDir - Temp directory for subtitle output
* @returns Extracted subtitle text, or undefined if extraction fails
*/
async extractSubtitles(videoPath, tempDir) {
const subtitlePath = join(tempDir, "subtitles.srt");
const ffSub = await loadFluentFfmpeg();
await new Promise((resolve, reject) => {
const timeoutId = setTimeout(() => {
reject(new Error(`ffmpeg subtitle extraction timed out after ${VIDEO_CONFIG.FFMPEG_TIMEOUT_MS}ms`));
}, VIDEO_CONFIG.FFMPEG_TIMEOUT_MS);
ffSub(videoPath)
.outputOptions(["-map", "0:s:0", "-c:s", "srt"])
.output(subtitlePath)
.on("end", () => {
clearTimeout(timeoutId);
resolve();
})
.on("error", (err) => {
clearTimeout(timeoutId);
reject(err);
})
.run();
});
try {
const srtContent = await fs.readFile(subtitlePath, "utf-8");
return this.parseSrtToPlainText(srtContent);
}
catch {
return undefined;
}
}
/**
* Parse SRT subtitle content into plain text.
* Strips sequence numbers, timestamps, and blank lines.
*
* @param srt - Raw SRT content
* @returns Plain text from subtitles
*/
parseSrtToPlainText(srt) {
if (!srt.trim()) {
return "";
}
return srt
.split("\n")
.filter((line) => {
const trimmed = line.trim();
// Skip empty lines
if (!trimmed) {
return false;
}
// Skip sequence numbers (pure digits)
if (/^\d+$/.test(trimmed)) {
return false;
}
// Skip timestamp lines (e.g., "00:01:23,456 --> 00:01:25,789")
if (/^\d{2}:\d{2}:\d{2}[,.]\d{3}\s*-->/.test(trimmed)) {
return false;
}
return true;
})
.map((line) => line.trim())
.join("\n")
.trim();
}
// ===========================================================================
// TEXT CONTENT BUILDER
// ===========================================================================
/**
* Build a structured text description of the video for LLM consumption.
*
* The output includes:
* - File name and basic info
* - Technical metadata (resolution, codec, duration, etc.)
* - Frame extraction summary
* - Subtitle text (if available)
*
* @param metadata - Extracted video metadata
* @param frameCount - Number of keyframes extracted
* @param subtitleText - Extracted subtitle text (if any)
* @param filename - Original filename
* @returns Formatted text content for the LLM
*/
buildTextContent(metadata, frameCount, subtitleText, filename) {
const lines = [];
lines.push(`[Video File: ${filename}]`);
lines.push("");
lines.push("## Video Metadata");
lines.push(`- Duration: ${metadata.durationFormatted}`);
lines.push(`- Resolution: ${metadata.width}x${metadata.height}`);
lines.push(`- Video Codec: ${metadata.codec}`);
if (metadata.fps > 0) {
lines.push(`- Frame Rate: ${metadata.fps} fps`);
}
if (metadata.bitrate > 0) {
lines.push(`- Bitrate: ${(metadata.bitrate / 1000).toFixed(0)} kbps`);
}
if (metadata.audioCodec) {
lines.push(`- Audio Codec: ${metadata.audioCodec}`);
if (metadata.audioChannels) {
lines.push(`- Audio Channels: ${metadata.audioChannels === 1 ? "mono" : metadata.audioChannels === 2 ? "stereo" : `${metadata.audioChannels}ch`}`);
}
if (metadata.audioSampleRate) {
lines.push(`- Audio Sample Rate: ${(metadata.audioSampleRate / 1000).toFixed(1)} kHz`);
}
}
lines.push(`- File Size: ${(metadata.fileSize / (1024 * 1024)).toFixed(1)} MB`);
lines.push("");
if (frameCount > 0) {
const intervalSec = this.getFrameInterval(metadata.duration);
lines.push(`## Keyframes (${frameCount} frames extracted every ~${intervalSec}s)`);
lines.push("The following images are keyframes extracted from the video at regular intervals.");
}
else {
lines.push("## Keyframes");
lines.push("No keyframes could be extracted from this video (it may be audio-only or use an unsupported codec).");
}
if (subtitleText) {
lines.push("");
lines.push("## Subtitles / Captions");
// Truncate very long subtitle text to avoid blowing up context
const maxSubtitleChars = 10_000;
if (subtitleText.length > maxSubtitleChars) {
lines.push(subtitleText.substring(0, maxSubtitleChars) +
`\n... [truncated, ${subtitleText.length - maxSubtitleChars} more characters]`);
}
else {
lines.push(subtitleText);
}
}
return lines.join("\n");
}
// ===========================================================================
// UTILITY METHODS
// ===========================================================================
/**
* Format a duration in seconds to a human-readable string.
*
* @param seconds - Duration in seconds
* @returns Formatted string (e.g., "1h 23m 45s")
*/
formatDuration(seconds) {
if (seconds <= 0) {
return "0s";
}
const hours = Math.floor(seconds / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
const secs = Math.floor(seconds % 60);
const parts = [];
if (hours > 0) {
parts.push(`${hours}h`);
}
if (minutes > 0) {
parts.push(`${minutes}m`);
}
if (secs > 0 || parts.length === 0) {
parts.push(`${secs}s`);
}
return parts.join(" ");
}
/**
* Get a file extension from FileInfo, falling back to ".mp4".
*
* @param fileInfo - File information
* @returns File extension with leading dot
*/
getExtensionFromFileInfo(fileInfo) {
const name = fileInfo.name || "";
const dotIndex = name.lastIndexOf(".");
if (dotIndex >= 0) {
return name.substring(dotIndex).toLowerCase();
}
// Fallback: derive from MIME type
const mimeExtMap = {
"video/mp4": ".mp4",
"video/x-matroska": ".mkv",
"video/quicktime": ".mov",
"video/webm": ".webm",
"video/x-msvideo": ".avi",
"video/x-ms-wmv": ".wmv",
"video/x-flv": ".flv",
"video/3gpp": ".3gp",
"video/3gpp2": ".3g2",
"video/MP2T": ".ts",
"video/ogg": ".ogv",
};
return mimeExtMap[fileInfo.mimetype] || ".mp4";
}
/**
* Write a buffer to a file using streaming to handle large files efficiently.
*
* @param buffer - Buffer to write
* @param filePath - Destination file path
*/
async writeBufferToFile(buffer, filePath) {
const readable = Readable.from(buffer);
const writable = createWriteStream(filePath);
await pipeline(readable, writable);
}
// ===========================================================================
// TARGETED EXTRACTION API
// ===========================================================================
/**
* Extract frames from a specific time range in a video.
*
* This is the on-demand extraction method called by the `extract_file_content`
* tool. Unlike initial keyframe extraction (which covers the full video),
* this targets a specific time window with configurable frame count.
*
* @param buffer - Video file buffer
* @param filename - Original filename (for extension detection)
* @param startSec - Start time in seconds
* @param endSec - End time in seconds
* @param frameCount - Number of frames to extract in the range (default: 5)
* @returns Array of JPEG frame buffers
*/
async extractFrameRange(buffer, filename, startSec, endSec, frameCount = 5) {
await initFfmpegPaths();
const tempDir = join(tmpdir(), `neurolink-video-extract-${randomUUID()}`);
try {
await fs.mkdir(tempDir, { recursive: true });
// Write buffer to temp file
const ext = this.guessExtensionFromName(filename);
const tempVideoPath = join(tempDir, `input${ext}`);
await this.writeBufferToFile(buffer, tempVideoPath);
// Calculate evenly-spaced timestamps within the range
const duration = endSec - startSec;
if (duration <= 0) {
return [];
}
const clampedCount = Math.min(frameCount, VIDEO_CONFIG.MAX_FRAMES);
const timestamps = [];
let interval = duration;
if (clampedCount === 1) {
timestamps.push(startSec);
}
else {
interval = duration / (clampedCount - 1);
for (let i = 0; i < clampedCount; i++) {
timestamps.push(startSec + interval * i);
}
}
// Extract frames
const framesDir = join(tempDir, "frames");
await fs.mkdir(framesDir, { recursive: true });
await this.runFfmpegFrameExtraction(tempVideoPath, framesDir, timestamps, interval);
// Read and resize frames
const keyframes = [];
for (let i = 0; i < timestamps.length; i++) {
const framePath = join(framesDir, `frame_${String(i + 1).padStart(4, "0")}.jpg`);
try {
await fs.access(framePath);
const rawFrame = await fs.readFile(framePath);
const sharp = (await import("sharp")).default;
const resized = await sharp(rawFrame)
.resize(VIDEO_CONFIG.FRAME_MAX_DIMENSION, VIDEO_CONFIG.FRAME_MAX_DIMENSION, {
fit: "inside",
withoutEnlargement: true,
})
.jpeg({ quality: VIDEO_CONFIG.FRAME_JPEG_QUALITY })
.toBuffer();
keyframes.push(resized);
}
catch {
// Skip individual frame failures
}
}
return keyframes;
}
finally {
await fs.rm(tempDir, { recursive: true, force: true }).catch(() => {
/* cleanup - ignore temp dir removal errors */
});
}
}
/**
* Guess file extension from filename, with fallback to .mp4.
*/
guessExtensionFromName(filename) {
const dotIndex = filename.lastIndexOf(".");
if (dotIndex >= 0) {
return filename.substring(dotIndex).toLowerCase();
}
return ".mp4";
}
}
// =============================================================================
// SINGLETON INSTANCE
// =============================================================================
/**
* Singleton Video processor instance.
* Use this for standard video processing operations.
*
* @example
* ```typescript
* import { videoProcessor } from "./VideoProcessor.js";
*
* const result = await videoProcessor.processFile(fileInfo);
* ```
*/
export const videoProcessor = new VideoProcessor();
// =============================================================================
// HELPER FUNCTIONS
// =============================================================================
/**
* Check if a file is a video file.
* Matches by MIME type or file extension.
*
* @param mimetype - MIME type of the file
* @param filename - Filename (for extension-based detection)
* @returns true if the file is a supported video file
*
* @example
* ```typescript
* if (isVideoFile("video/mp4", "recording.mp4")) {
* const result = await processVideo(fileInfo);
* }
*
* if (isVideoFile("", "clip.mkv")) {
* // Also matches by extension
* }
* ```
*/
export function isVideoFile(mimetype, filename) {
return videoProcessor.isFileSupported(mimetype, filename);
}
/**
* Process a single video file.
* Convenience function that uses the singleton processor.
*
* @param fileInfo - File information (can include URL or buffer)
* @param options - Optional processing options (auth headers, timeout, retry config)
* @returns Processing result with extracted video data or error
*
* @example
* ```typescript
* import { processVideo } from "./VideoProcessor.js";
*
* const result = await processVideo({
* id: "vid-123",
* name: "demo.mp4",
* mimetype: "video/mp4",
* size: 15_000_000,
* buffer: videoBuffer,
* });
*
* if (result.success) {
* console.log(`Duration: ${result.data.metadata.durationFormatted}`);
* console.log(`Extracted ${result.data.frameCount} keyframes`);
* console.log(`Text content:\n${result.data.textContent}`);
*
* if (result.data.subtitleText) {
* console.log(`Subtitles:\n${result.data.subtitleText}`);
* }
* } else {
* console.error(`Processing failed: ${result.error?.userMessage}`);
* }
* ```
*/
export async function processVideo(fileInfo, options) {
return videoProcessor.processFile(fileInfo, options);
}