@devansh-m12/faceify
Version:
Smart video converter that transforms landscape videos to mobile-friendly vertical format with face detection
923 lines (922 loc) • 46 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.MobileVideoConverter = void 0;
// Use require for fluent-ffmpeg since its types are CommonJS style
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
// Import canvas with type assertions to work with face-api.js
const canvas = __importStar(require("canvas"));
// Import face-api.js using require to avoid TypeScript errors
// @ts-ignore
const faceapi = require('face-api.js');
// Import ffmpeg using require to match its CommonJS style module
const ffmpeg = require('fluent-ffmpeg');
// Initialize face-api.js
let modelsLoaded = false;
async function loadFaceDetectionModels() {
if (modelsLoaded)
return;
// Register the canvas implementation with face-api.js
const { Canvas, Image, ImageData } = canvas;
// @ts-ignore - Ignore type errors when monkey patching
faceapi.env.monkeyPatch({ Canvas, Image, ImageData });
// Check if models directory exists and create it if needed
const modelsDir = path.join(__dirname, '../models');
if (!fs.existsSync(modelsDir)) {
fs.mkdirSync(modelsDir, { recursive: true });
}
// Download models if they don't exist
const modelPath = path.join(modelsDir, 'tiny_face_detector_model-weights_manifest.json');
if (!fs.existsSync(modelPath)) {
console.log('Downloading face detection models...');
// Copy models from node_modules to models directory
await downloadTinyFaceDetectorModel(modelsDir);
}
try {
// Load tiny face detector model
await faceapi.nets.tinyFaceDetector.loadFromDisk(modelsDir);
modelsLoaded = true;
console.log('Face detection models loaded successfully');
}
catch (error) {
console.error('Error loading face detection models:', error);
throw error;
}
}
// Function to download the tiny face detector model
async function downloadTinyFaceDetectorModel(targetDir) {
return new Promise((resolve, reject) => {
const modelUrl = 'https://raw.githubusercontent.com/justadudewhohacks/face-api.js/master/weights/tiny_face_detector_model-weights_manifest.json';
// Using node's HTTP(S) request to download the model manifest
const https = require('https');
const http = require('http');
// Download using the appropriate protocol
const client = modelUrl.startsWith('https') ? https : http;
client.get(modelUrl, (res) => {
let data = '';
res.on('data', (chunk) => { data += chunk; });
res.on('end', () => {
try {
// Parse the manifest
const manifest = JSON.parse(data);
const weightsUrls = manifest.map((entry) => `https://raw.githubusercontent.com/justadudewhohacks/face-api.js/master/weights/${entry.paths[0]}`);
// Save the manifest
fs.writeFileSync(path.join(targetDir, 'tiny_face_detector_model-weights_manifest.json'), data);
// Download each weight file
let downloaded = 0;
weightsUrls.forEach((url) => {
const fileName = url.split('/').pop();
const file = fs.createWriteStream(path.join(targetDir, fileName));
client.get(url, (res) => {
res.pipe(file);
file.on('finish', () => {
file.close();
downloaded++;
if (downloaded === weightsUrls.length) {
console.log('Models downloaded successfully');
resolve();
}
});
}).on('error', (err) => {
fs.unlink(path.join(targetDir, fileName), () => { });
reject(err);
});
});
}
catch (error) {
reject(error);
}
});
}).on('error', (err) => {
reject(err);
});
});
}
class MobileVideoConverter {
constructor(options = {}) {
this.options = {
outputDirectory: options.outputDirectory || path.resolve('./converted-videos'),
targetWidth: options.targetWidth || 1080,
targetHeight: options.targetHeight || 1920,
detectFaces: options.detectFaces ?? true
};
// Ensure output directory exists
if (!fs.existsSync(this.options.outputDirectory)) {
fs.mkdirSync(this.options.outputDirectory, { recursive: true });
}
// Also create segments directory in advance
const segmentsDir = path.join(this.options.outputDirectory, 'segments');
if (!fs.existsSync(segmentsDir)) {
fs.mkdirSync(segmentsDir, { recursive: true });
}
}
async convertVideo(inputPath) {
// Validate input file exists
if (!fs.existsSync(inputPath)) {
throw new Error(`Input file not found: ${inputPath}`);
}
// Get video information first to determine correct crop dimensions
const videoInfo = await this.getVideoInfo(inputPath);
console.log('Video dimensions:', videoInfo);
// Detect faces if option is enabled
let faceTimeline;
if (this.options.detectFaces) {
try {
// Detect faces at multiple timestamps throughout the video
faceTimeline = await this.detectFacesAcrossTimeline(inputPath, videoInfo.duration);
console.log('Face timeline detected:', faceTimeline);
}
catch (error) {
console.error("Face detection failed, continuing without face detection:", error);
// Continue without face detection
}
}
// Generate output filename
const outputFilename = `mobile_${path.basename(inputPath)}`;
const outputPath = path.join(this.options.outputDirectory, outputFilename);
// Process video with dynamic cropping based on detected faces
await this.processVideoWithDynamicCropping(inputPath, outputPath, faceTimeline, videoInfo);
// Return the faces from the first timestamp for backward compatibility
const firstFaces = faceTimeline && faceTimeline.length > 0 ? faceTimeline[0].faces : undefined;
return {
originalPath: inputPath,
convertedPath: outputPath,
faces: firstFaces
};
}
// Get video information using ffmpeg
getVideoInfo(videoPath) {
return new Promise((resolve, reject) => {
ffmpeg.ffprobe(videoPath, (err, metadata) => {
if (err) {
reject(err);
return;
}
// Find video stream
const videoStream = metadata.streams.find((s) => s.codec_type === 'video');
if (!videoStream) {
reject(new Error('No video stream found'));
return;
}
// Get duration from format section if not in video stream
const duration = videoStream.duration
? parseFloat(videoStream.duration)
: (metadata.format.duration ? parseFloat(metadata.format.duration) : 0);
resolve({
width: videoStream.width,
height: videoStream.height,
duration: duration
});
});
});
}
// Detect faces across multiple timestamps in the video
async detectFacesAcrossTimeline(videoPath, duration) {
// Create a timeline of face detections
const faceTimeline = [];
try {
// Create temp directory for frames
const frameOutputDir = path.join(this.options.outputDirectory, 'frames');
if (!fs.existsSync(frameOutputDir)) {
fs.mkdirSync(frameOutputDir, { recursive: true });
}
// Load face detection models
await loadFaceDetectionModels();
console.log('Detecting scene changes and key frames...');
// Step 1: Detect scene changes using FFmpeg's scene detection
const sceneChangeTimestamps = await this.detectSceneChanges(videoPath, duration);
// Step 2: Add start and end frame timestamps
const keyTimestamps = new Set([
0.5, // Start with a frame slightly after the beginning
...sceneChangeTimestamps,
Math.max(0.5, duration - 2) // End with a frame slightly before the end
]);
// Step 3: If we have too few timestamps, add some evenly distributed ones
const minSamples = 5;
if (keyTimestamps.size < minSamples) {
const additionalSamples = minSamples - keyTimestamps.size;
for (let i = 1; i <= additionalSamples; i++) {
keyTimestamps.add(duration * i / (additionalSamples + 1));
}
}
// Step 4: Ensure we don't have too many (for performance reasons)
const maxSamples = 25;
let timestamps = Array.from(keyTimestamps).sort((a, b) => a - b);
if (timestamps.length > maxSamples) {
// If we have too many, sample evenly
const step = timestamps.length / maxSamples;
const sampledTimestamps = [];
for (let i = 0; i < timestamps.length; i += step) {
sampledTimestamps.push(timestamps[Math.floor(i)]);
}
// Always include the first and last timestamps
if (!sampledTimestamps.includes(timestamps[0])) {
sampledTimestamps.unshift(timestamps[0]);
}
if (!sampledTimestamps.includes(timestamps[timestamps.length - 1])) {
sampledTimestamps.push(timestamps[timestamps.length - 1]);
}
timestamps = sampledTimestamps.sort((a, b) => a - b);
}
console.log(`Analyzing ${timestamps.length} key frames across ${duration} seconds of video...`);
// Sample frames at the detected timestamps
for (let i = 0; i < timestamps.length; i++) {
const timestamp = timestamps[i];
// Extract frame at this timestamp
const framePath = path.join(frameOutputDir, `frame_${Date.now()}_${i}.jpg`);
// Format timestamp for ffmpeg
const timestampStr = this.formatTimestamp(timestamp);
// Extract the frame
await new Promise((resolve, reject) => {
ffmpeg(videoPath)
.outputOptions(['-vframes 1', `-ss ${timestampStr}`])
.output(framePath)
.on('end', () => resolve())
.on('error', (err) => reject(err))
.run();
});
// Detect faces in this frame
const faces = await this.detectFacesInFrame(framePath);
// Add to timeline
faceTimeline.push({
timestamp,
faces
});
// Clean up the temporary frame file
fs.unlinkSync(framePath);
}
// Post-process the timeline to handle segments with no faces detected
const videoInfo = {
width: 0,
height: 0,
duration: duration
};
this.postProcessFaceTimeline(faceTimeline, videoInfo);
return faceTimeline;
}
catch (error) {
console.error('Face timeline detection error:', error);
return []; // Return empty array if detection fails
}
}
// Detect scene changes using FFmpeg's scene detection
async detectSceneChanges(videoPath, duration) {
return new Promise((resolve, reject) => {
// Use a more reliable approach with scene filter and extracting frame data
console.log('Analyzing video content for scene changes...');
// Create temporary directory for scene detection frames
const tempDir = path.join(this.options.outputDirectory, 'scene_detection');
if (!fs.existsSync(tempDir)) {
fs.mkdirSync(tempDir, { recursive: true });
}
// Extract scene detection data using FFmpeg with a more sensitive threshold
const sceneDetectionProcess = ffmpeg(videoPath)
.outputOptions([
'-vf', `select='gt(scene,0.20)',showinfo`, // Lower threshold to catch more scene changes
'-vsync', '0',
'-f', 'null'
])
.output('/dev/null');
let ffmpegOutput = '';
sceneDetectionProcess
.on('stderr', (stderr) => {
ffmpegOutput += stderr;
})
.on('end', () => {
try {
// Parse the output to extract timestamps from the showinfo filter
const timestampRegex = /pts_time:([\d.]+)/g;
const rawTimestamps = [];
let match;
while ((match = timestampRegex.exec(ffmpegOutput)) !== null) {
rawTimestamps.push(parseFloat(match[1]));
}
console.log(`Found ${rawTimestamps.length} raw scene change candidates`);
// Filter out timestamps that are too close to each other
let lastTimestamp = 0;
const filteredTimestamps = rawTimestamps
.sort((a, b) => a - b)
.filter(ts => {
// Ensure at least 3 seconds between scene changes
const shouldKeep = ts - lastTimestamp >= 3;
if (shouldKeep)
lastTimestamp = ts;
return shouldKeep;
});
console.log(`Filtered to ${filteredTimestamps.length} scene changes after removing closely spaced frames`);
// If we found enough scene changes, use them
if (filteredTimestamps.length >= 3) {
// Add beginning and end points if needed
const finalTimestamps = [...filteredTimestamps];
if (finalTimestamps[0] > 5) {
finalTimestamps.unshift(0.5);
console.log('Added timestamp at beginning of video (0.5s)');
}
if (finalTimestamps[finalTimestamps.length - 1] < duration - 5) {
finalTimestamps.push(duration - 2);
console.log(`Added timestamp at end of video (${duration - 2}s)`);
}
// For long sections without scene changes, add intermediate points
const maxGap = 30; // Maximum gap in seconds between timestamps
const intermediatePoints = [];
for (let i = 0; i < finalTimestamps.length - 1; i++) {
const current = finalTimestamps[i];
const next = finalTimestamps[i + 1];
const gap = next - current;
if (gap > maxGap) {
const numIntermediatePoints = Math.floor(gap / maxGap);
const stepSize = gap / (numIntermediatePoints + 1);
for (let j = 1; j <= numIntermediatePoints; j++) {
const intermediateTime = current + (j * stepSize);
intermediatePoints.push(intermediateTime);
console.log(`Added intermediate timestamp at ${intermediateTime.toFixed(2)}s (gap was ${gap.toFixed(2)}s)`);
}
}
}
const allTimestamps = [...finalTimestamps, ...intermediatePoints].sort((a, b) => a - b);
console.log(`Final timeline has ${allTimestamps.length} key points for analysis`);
resolve(allTimestamps);
}
else {
// Fall back to time-based sampling but include any detected scenes
console.log('Not enough scene changes detected, combining with time-based sampling');
const timeBasedSamples = this.fallbackToTimeSampling(duration);
const combinedSamples = [...new Set([...filteredTimestamps, ...timeBasedSamples])];
combinedSamples.sort((a, b) => a - b);
console.log(`Combined with time-based sampling, using ${combinedSamples.length} samples`);
resolve(combinedSamples);
}
}
catch (error) {
console.error('Error processing scene detection data:', error);
resolve(this.fallbackToTimeSampling(duration));
}
})
.on('error', (err) => {
console.error('FFmpeg scene detection failed:', err.message);
resolve(this.fallbackToTimeSampling(duration));
})
.run();
});
}
// Helper method for time-based sampling fallback
fallbackToTimeSampling(duration) {
console.log('Using time-based sampling fallback');
const samples = [];
const everyNSeconds = 15; // One sample every 15 seconds
// Start with the beginning of the video
samples.push(0.5);
// Add regular interval samples
for (let time = everyNSeconds; time < duration - 5; time += everyNSeconds) {
samples.push(time);
}
// Add the end of the video
if (samples[samples.length - 1] < duration - 5) {
samples.push(duration - 2);
}
return samples;
}
// Post-process the face timeline to fill in gaps and smooth transitions
postProcessFaceTimeline(faceTimeline, videoInfo) {
if (!videoInfo)
return;
// If we have no timeline points or all points have faces, we're done
if (faceTimeline.length === 0 || faceTimeline.every(point => point.faces.length > 0)) {
return;
}
console.log('Post-processing face timeline to fill gaps and smooth transitions...');
// Find the most reliable face detection points (those with faces)
const reliablePoints = faceTimeline.filter(point => point.faces.length > 0);
// If we don't have any reliable points, we can't do much
if (reliablePoints.length === 0) {
return;
}
// For each point with no faces, interpolate or extrapolate from nearby reliable points
for (const point of faceTimeline) {
if (point.faces.length > 0)
continue; // Skip points that already have faces
// Find the closest reliable points before and after this timestamp
const before = [...reliablePoints]
.filter(p => p.timestamp < point.timestamp)
.sort((a, b) => b.timestamp - a.timestamp)[0]; // Closest earlier point
const after = [...reliablePoints]
.filter(p => p.timestamp > point.timestamp)
.sort((a, b) => a.timestamp - b.timestamp)[0]; // Closest later point
// If we have points before and after, interpolate
if (before && after) {
const totalInterval = after.timestamp - before.timestamp;
const pointPosition = (point.timestamp - before.timestamp) / totalInterval;
// Get the first face from each reference point (assuming it's the most important)
const faceBefore = before.faces[0];
const faceAfter = after.faces[0];
// Create interpolated face
const interpolatedFace = {
x: Math.round(faceBefore.x + (faceAfter.x - faceBefore.x) * pointPosition),
y: Math.round(faceBefore.y + (faceAfter.y - faceBefore.y) * pointPosition),
width: Math.round(faceBefore.width + (faceAfter.width - faceBefore.width) * pointPosition),
height: Math.round(faceBefore.height + (faceAfter.height - faceBefore.height) * pointPosition)
};
// Add interpolated face
point.faces = [interpolatedFace];
console.log(`Interpolated face at ${point.timestamp}s: ${JSON.stringify(interpolatedFace)}`);
}
// If we only have points before, use the latest one
else if (before) {
point.faces = [...before.faces];
console.log(`Using faces from ${before.timestamp}s for timestamp ${point.timestamp}s`);
}
// If we only have points after, use the earliest one
else if (after) {
point.faces = [...after.faces];
console.log(`Using faces from ${after.timestamp}s for timestamp ${point.timestamp}s`);
}
// If we have no reliable points at all (shouldn't happen due to earlier check)
else {
// Use default center crop
// This is handled by the cropping logic when faces array is empty
}
}
}
// Helper to format timestamps for ffmpeg
formatTimestamp(seconds) {
const hours = Math.floor(seconds / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
const secs = Math.floor(seconds % 60);
const ms = Math.floor((seconds % 1) * 1000);
return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}.${ms.toString().padStart(3, '0')}`;
}
// Detect faces in a single frame
async detectFacesInFrame(framePath) {
try {
// Load the image
const img = await canvas.loadImage(framePath);
// Create canvas with the same dimensions as the image
const cvs = canvas.createCanvas(img.width, img.height);
const ctx = cvs.getContext('2d');
ctx.drawImage(img, 0, 0);
// Try multiple face detection approaches with different parameters
// 1. First try with stricter parameters for high confidence
// @ts-ignore - Ignore type errors with TinyFaceDetectorOptions
const strictOptions = new faceapi.TinyFaceDetectorOptions({
inputSize: 416, // Larger input size for better detection
scoreThreshold: 0.6 // Higher score threshold (more confident)
});
// @ts-ignore - Ignore type errors with detectAllFaces
let detections = await faceapi.detectAllFaces(cvs, strictOptions);
// 2. If no faces found, try with more lenient parameters
if (detections.length === 0) {
// @ts-ignore
const lenientOptions = new faceapi.TinyFaceDetectorOptions({
inputSize: 416,
scoreThreshold: 0.3 // Lower score threshold to detect less confident faces
});
// @ts-ignore
detections = await faceapi.detectAllFaces(cvs, lenientOptions);
if (detections.length > 0) {
console.log(`Found ${detections.length} faces using lenient parameters`);
}
}
// 3. Apply additional heuristics for better crops
// If we found multiple faces, filter out small or peripheral ones
if (detections.length > 1) {
// Sort by size (largest first)
// @ts-ignore
detections.sort((a, b) => (b.box.width * b.box.height) - (a.box.width * a.box.height));
// Keep only the largest/most central faces
// Filter out faces that are too small compared to the largest one
const largestFaceArea = detections[0].box.width * detections[0].box.height;
detections = detections.filter((detection, index) => {
if (index === 0)
return true; // Always keep the largest face
const area = detection.box.width * detection.box.height;
// Keep if at least 40% as large as the largest face
return area >= largestFaceArea * 0.4;
});
}
// Convert detections to the expected format
return detections.map((detection) => {
const box = detection.box;
return {
x: Math.round(box.x),
y: Math.round(box.y),
width: Math.round(box.width),
height: Math.round(box.height)
};
});
}
catch (error) {
console.error('Face detection error for frame:', error);
return []; // Return empty array if detection fails
}
}
// Process video with dynamic cropping based on face timeline
async processVideoWithDynamicCropping(inputPath, outputPath, faceTimeline, videoInfo) {
if (!videoInfo) {
videoInfo = await this.getVideoInfo(inputPath);
}
// If no face timeline or no faces detected, use the old method
if (!faceTimeline || faceTimeline.length === 0) {
return this.processVideoForMobile(inputPath, outputPath, undefined, videoInfo);
}
try {
// Determine crop parameters for each timestamp
const cropTimelinePoints = faceTimeline.map(timePoint => {
const cropOptions = this.calculateVerticalCrop(inputPath, timePoint.faces, videoInfo);
return {
timestamp: timePoint.timestamp,
cropOptions
};
});
// If there's only one timestamp or all crop options are the same, just use a single crop
if (cropTimelinePoints.length === 1 || this.allCropPointsEqual(cropTimelinePoints)) {
console.log('Using static crop for the entire video');
return this.processVideoForMobile(inputPath, outputPath, faceTimeline[0].faces, videoInfo);
}
// For simplicity in debugging, let's log what we detected
cropTimelinePoints.forEach((point, i) => {
console.log(`Timestamp ${point.timestamp}s: Crop x=${point.cropOptions.x}, y=${point.cropOptions.y}`);
});
// Smooth the crop timeline to avoid sudden jumps
const smoothedCropTimeline = this.smoothCropTimeline(cropTimelinePoints);
// After smoothing, log the adjusted crop points
console.log('After smoothing:');
smoothedCropTimeline.forEach((point, i) => {
console.log(`Timestamp ${point.timestamp}s: Crop x=${point.cropOptions.x}, y=${point.cropOptions.y}`);
});
// Create a more reliable approach - split and combine
// We'll split the video into segments, crop each one differently, then concatenate
console.log('Creating segmented crop with', smoothedCropTimeline.length, 'segments');
// Create temporary directory for segments - use absolute path
const segmentsDir = path.resolve(path.join(this.options.outputDirectory, 'segments'));
if (!fs.existsSync(segmentsDir)) {
fs.mkdirSync(segmentsDir, { recursive: true });
}
// Calculate aspect ratio for vertical video (9:16)
const aspectRatio = 9 / 16;
const cropWidth = Math.floor(videoInfo.height * aspectRatio);
const cropHeight = videoInfo.height;
// Create segment list
const segments = [];
// Process each segment
for (let i = 0; i < smoothedCropTimeline.length; i++) {
const current = smoothedCropTimeline[i];
const next = i < smoothedCropTimeline.length - 1 ? smoothedCropTimeline[i + 1] : null;
// Calculate segment duration
const segmentDuration = next ? (next.timestamp - current.timestamp) : (videoInfo.duration - current.timestamp);
if (segmentDuration < 0.5)
continue; // Skip very small segments
// Add segment
segments.push({
input: inputPath,
start: current.timestamp,
duration: segmentDuration,
cropX: current.cropOptions.x,
cropY: current.cropOptions.y
});
}
// Create a temporary segmentation script
const segmentationScript = segments.map((segment, index) => {
const outputSegment = path.join(segmentsDir, `segment_${index}.mp4`);
return new Promise((resolve, reject) => {
ffmpeg(segment.input)
.seekInput(segment.start)
.duration(segment.duration)
.videoFilters([
{
filter: 'crop',
options: {
w: cropWidth,
h: cropHeight,
x: segment.cropX,
y: segment.cropY
}
},
{
filter: 'scale',
options: `${this.options.targetWidth}:${this.options.targetHeight}`
}
])
.output(outputSegment)
.on('end', () => {
console.log(`Segment ${index} processed`);
resolve(outputSegment);
})
.on('error', (err) => {
console.error(`Error processing segment ${index}:`, err);
reject(err);
})
.run();
});
});
// Process all segments
try {
const segmentPaths = await Promise.all(segmentationScript);
// Create a list file for concatenation - ensure absolute paths for better compatibility
const listFilePath = path.resolve(path.join(segmentsDir, 'segments.txt'));
const listContent = segmentPaths
.map(p => `file '${path.resolve(p)}'`)
.join('\n');
fs.writeFileSync(listFilePath, listContent);
// Concatenate segments
await new Promise((resolve, reject) => {
ffmpeg()
.input(listFilePath)
.inputOptions(['-f concat', '-safe 0'])
.outputOptions(['-c copy']) // Just copy streams, no re-encoding
.output(outputPath)
.on('start', (commandLine) => {
console.log('Concatenating segments with command: ' + commandLine);
})
.on('end', () => {
console.log('Video concatenation complete');
resolve();
})
.on('error', (err) => {
console.error('Error during concatenation:', err);
reject(err);
})
.run();
});
// Clean up temporary files
console.log('Cleaning up temporary files...');
segmentPaths.forEach(p => {
try {
fs.unlinkSync(p);
}
catch (e) { /* ignore */ }
});
try {
fs.unlinkSync(listFilePath);
}
catch (e) { /* ignore */ }
console.log('Video conversion complete');
return;
}
catch (error) {
console.error('Error processing segments:', error);
throw error;
}
}
catch (error) {
console.error('Error in dynamic cropping:', error);
// Fall back to static cropping if something goes wrong
return this.processVideoForMobile(inputPath, outputPath, undefined, videoInfo);
}
}
// Smooth the crop timeline to avoid sudden jumps
smoothCropTimeline(cropTimelinePoints) {
if (cropTimelinePoints.length <= 2)
return cropTimelinePoints;
console.log('Smoothing crop timeline...');
// Make a deep copy of the timeline
const smoothed = JSON.parse(JSON.stringify(cropTimelinePoints));
// Apply moving average smoothing to x coordinates
// We'll use a window size of 3 (the point itself and one point on each side)
for (let i = 1; i < smoothed.length - 1; i++) {
const prev = smoothed[i - 1].cropOptions;
const current = smoothed[i].cropOptions;
const next = smoothed[i + 1].cropOptions;
// Calculate time-weighted average for smoother transitions
// Points that are closer in time should have more influence
const totalTime = (smoothed[i].timestamp - smoothed[i - 1].timestamp) +
(smoothed[i + 1].timestamp - smoothed[i].timestamp);
const weightPrev = 1 - ((smoothed[i].timestamp - smoothed[i - 1].timestamp) / totalTime);
const weightNext = 1 - ((smoothed[i + 1].timestamp - smoothed[i].timestamp) / totalTime);
// Apply weighted average smoothing
const smoothX = ((prev.x * weightPrev) +
(current.x) +
(next.x * weightNext)) / (1 + weightPrev + weightNext);
// Apply smoothing with a bias toward keeping the original position (70% original, 30% smoothed)
current.x = Math.round(current.x * 0.7 + smoothX * 0.3);
// Do the same for Y coordinate if needed
const smoothY = ((prev.y * weightPrev) +
(current.y) +
(next.y * weightNext)) / (1 + weightPrev + weightNext);
current.y = Math.round(current.y * 0.7 + smoothY * 0.3);
}
// Additional step: remove redundant keyframes that are too close to each other
// This helps reduce the number of segments and makes transitions smoother
const filtered = [smoothed[0]]; // Always keep the first point
for (let i = 1; i < smoothed.length; i++) {
const lastAdded = filtered[filtered.length - 1];
const current = smoothed[i];
// If this point is very similar to the last added point, skip it
// unless it's the last point (we always want to keep the last point)
const isSimilar = Math.abs(current.cropOptions.x - lastAdded.cropOptions.x) < 20 &&
Math.abs(current.cropOptions.y - lastAdded.cropOptions.y) < 10;
const minTimeDiff = 5; // At least 5 seconds between keyframes
const isCloseInTime = (current.timestamp - lastAdded.timestamp) < minTimeDiff;
if ((isSimilar && isCloseInTime) && i < smoothed.length - 1) {
continue; // Skip this keyframe
}
filtered.push(current);
}
console.log(`Reduced from ${smoothed.length} to ${filtered.length} keyframes after smoothing`);
return filtered;
}
// Check if all crop points are effectively the same
allCropPointsEqual(cropTimelinePoints) {
if (cropTimelinePoints.length <= 1)
return true;
const firstCrop = cropTimelinePoints[0].cropOptions;
// Allow a small margin of error (5 pixels) to consider crops as "same"
const margin = 5;
return cropTimelinePoints.every(point => {
const crop = point.cropOptions;
return (Math.abs(crop.x - firstCrop.x) <= margin &&
Math.abs(crop.y - firstCrop.y) <= margin);
});
}
// Create a timeline expression for FFmpeg filters
createTimelineExpression(keypoints) {
if (keypoints.length === 0)
return '0';
if (keypoints.length === 1)
return keypoints[0].value.toString();
// For multiple keypoints, create a piecewise linear expression
// Format: if(lt(T,t1),v0,if(lt(T,t2),v0+(v1-v0)*(T-t0)/(t1-t0),if(...)))
let expr = '';
for (let i = keypoints.length - 1; i >= 0; i--) {
const current = keypoints[i];
if (i === 0) {
// First keypoint (earliest in time)
expr = current.value.toString();
}
else if (i === keypoints.length - 1) {
// Last keypoint (latest in time)
expr = current.value.toString();
}
else {
// Intermediate keypoint - linear interpolation from previous
const next = keypoints[i + 1];
const interpolation = `${current.value}+(${next.value}-${current.value})*(T-${current.time})/(${next.time}-${current.time})`;
expr = `if(lt(T,${next.time}),${interpolation},${expr})`;
}
}
// Add the variable T that represents time in the video
expr = `${expr}`;
return expr;
}
// Legacy method for static cropping - keeping this for fallback
processVideoForMobile(inputPath, outputPath, faces, videoInfo) {
return new Promise((resolve, reject) => {
// Determine crop parameters
const cropOptions = this.calculateVerticalCrop(inputPath, faces, videoInfo);
console.log('Applying static crop with options:', cropOptions);
// Use ffmpeg directly since we're using require
ffmpeg(inputPath)
.videoFilters([
// Crop video
{
filter: 'crop',
options: {
w: cropOptions.width,
h: cropOptions.height,
x: cropOptions.x,
y: cropOptions.y
}
},
// Scale to mobile-friendly vertical resolution
{
filter: 'scale',
options: `${this.options.targetWidth}:${this.options.targetHeight}`
}
])
.output(outputPath)
.on('start', (commandLine) => {
console.log('Spawned FFmpeg with command: ' + commandLine);
})
.on('end', () => {
console.log('Video conversion complete');
resolve();
})
.on('error', (err) => {
console.error('Error during video conversion:', err);
reject(err);
})
.run();
});
}
calculateVerticalCrop(inputPath, faces, videoInfo) {
// Use video dimensions if available
const videoWidth = videoInfo ? videoInfo.width : this.options.targetWidth;
const videoHeight = videoInfo ? videoInfo.height : this.options.targetHeight;
// Calculate aspect ratio for vertical video (9:16)
const aspectRatio = 9 / 16;
// Calculate crop width and height to maintain aspect ratio
let cropWidth, cropHeight;
if (videoWidth / videoHeight > aspectRatio) {
// If the video is wider than 9:16, crop the width
cropHeight = videoHeight;
cropWidth = Math.floor(cropHeight * aspectRatio);
}
else {
// If the video is taller than 9:16, crop the height
cropWidth = videoWidth;
cropHeight = Math.floor(cropWidth / aspectRatio);
}
// Center crop by default
let x = Math.floor((videoWidth - cropWidth) / 2);
let y = Math.floor((videoHeight - cropHeight) / 2);
// If faces are detected, center crop around the first face or the average of all faces
if (faces && faces.length > 0) {
if (faces.length === 1) {
// Center on the single face, but ensure the crop rect contains the face
const face = faces[0];
const faceCenter = {
x: face.x + face.width / 2,
y: face.y + face.height / 2
};
// Calculate potential crop rect centered on face
let potentialX = Math.max(0, Math.min(videoWidth - cropWidth, faceCenter.x - cropWidth / 2));
let potentialY = Math.max(0, Math.min(videoHeight - cropHeight, faceCenter.y - cropHeight / 2));
// Check if this crop would include the entire face
const faceLeft = face.x;
const faceRight = face.x + face.width;
const faceTop = face.y;
const faceBottom = face.y + face.height;
// Adjust to ensure face is fully visible if possible
if (faceLeft < potentialX) {
potentialX = Math.max(0, faceLeft);
}
if (faceRight > potentialX + cropWidth) {
potentialX = Math.min(videoWidth - cropWidth, faceRight - cropWidth);
}
// Apply the adjusted coordinates
x = potentialX;
y = potentialY;
}
else {
// With multiple faces, find the central point between them
// Weight larger/more central faces more heavily
let totalWeight = 0;
let weightedX = 0;
let weightedY = 0;
faces.forEach(face => {
// Weight based on face size (larger faces get higher weight)
const faceSize = face.width * face.height;
// Additional weight for faces in the center
const centrality = 1 - Math.min(1, (Math.abs((face.x + face.width / 2) - videoWidth / 2) / videoWidth) * 2 +
(Math.abs((face.y + face.height / 2) - videoHeight / 2) / videoHeight) * 2) / 2;
const weight = faceSize * centrality;
totalWeight += weight;
// Accumulate weighted positions
weightedX += (face.x + face.width / 2) * weight;
weightedY += (face.y + face.height / 2) * weight;
});
if (totalWeight > 0) {
// Calculate weighted center
const centerX = weightedX / totalWeight;
const centerY = weightedY / totalWeight;
// Center crop on the weighted center
x = Math.max(0, Math.min(videoWidth - cropWidth, centerX - cropWidth / 2));
y = Math.max(0, Math.min(videoHeight - cropHeight, centerY - cropHeight / 2));
}
}
}
// Safety check to ensure crop remains within video boundaries
x = Math.max(0, Math.min(x, videoWidth - cropWidth));
y = Math.max(0, Math.min(y, videoHeight - cropHeight));
return {
width: cropWidth,
height: cropHeight,
x,
y
};
}
}
exports.MobileVideoConverter = MobileVideoConverter;
exports.default = MobileVideoConverter;