UNPKG

@aj-archipelago/cortex

Version:

Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.

269 lines (238 loc) 9.41 kB
import fs from 'fs'; import http from 'http'; import https from 'https'; import os from 'os'; import path from 'path'; import { Transform } from 'stream'; import { pipeline } from 'stream/promises'; import { promisify } from 'util'; import axios from 'axios'; import ffmpeg from 'fluent-ffmpeg'; import { v4 as uuidv4 } from 'uuid'; import { ensureEncoded } from './helper.js'; const ffmpegProbe = promisify(ffmpeg.ffprobe); // Temp file management const CLEANUP_INTERVAL_MS = 5 * 60 * 1000; // 5 minutes const tempDirectories = new Map(); // dir -> { createdAt, requestId } // Temp directory cleanup async function cleanupTempDirectories() { for (const [dir, info] of tempDirectories) { try { // Cleanup directories older than 1 hour if (Date.now() - info.createdAt > 60 * 60 * 1000) { await fs.promises.rm(dir, { recursive: true, force: true }); tempDirectories.delete(dir); console.log(`Cleaned up old temp directory: ${dir}`); } } catch (err) { // Directory might be gone tempDirectories.delete(dir); } } } // Setup periodic cleanup setInterval(async () => { try { await cleanupTempDirectories(); } catch (err) { console.error('Error during periodic cleanup:', err); } }, CLEANUP_INTERVAL_MS); // Process a single chunk with streaming and progress tracking async function processChunk(inputPath, outputFileName, start, duration) { return new Promise((resolve, reject) => { const command = ffmpeg(inputPath) .seekInput(start) .duration(duration) .format('mp3') .audioCodec('libmp3lame') .audioBitrate(128) .on('start', () => { console.log(`Processing chunk: ${start}s -> ${start + duration}s`); }) .on('progress', (progress) => { if (progress.percent) { console.log(`Chunk progress: ${progress.percent}%`); } }) .on('error', (err, stdout, stderr) => { console.error('FFmpeg error:', err.message); if (stdout) console.log('FFmpeg stdout:', stdout); if (stderr) console.error('FFmpeg stderr:', stderr); reject(err); }) .on('end', () => { console.log(`Chunk complete: ${outputFileName}`); resolve(outputFileName); }); // Use pipeline for better error handling and backpressure pipeline( command, fs.createWriteStream(outputFileName, { highWaterMark: 4 * 1024 * 1024 }), // 4MB chunks ).catch(reject); }); } const generateUniqueFolderName = () => { const uniqueFolderName = uuidv4(); const tempFolderPath = os.tmpdir(); return path.join(tempFolderPath, uniqueFolderName); }; async function downloadFile(url, outputPath) { try { const agent = { http: new http.Agent({ keepAlive: true, maxSockets: 10, maxFreeSockets: 10, timeout: 60000, }), https: new https.Agent({ keepAlive: true, maxSockets: 10, maxFreeSockets: 10, timeout: 60000, }), }; // Use the original URL without any decoding const response = await axios.get(url, { responseType: 'stream', timeout: 30000, maxContentLength: Infinity, decompress: true, httpAgent: agent.http, httpsAgent: agent.https, maxRedirects: 5, validateStatus: (status) => status >= 200 && status < 300, }); // Use pipeline for better error handling and memory management await pipeline( response.data, fs.createWriteStream(outputPath, { highWaterMark: 4 * 1024 * 1024 }), // 4MB chunks ); if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) { throw new Error('Download failed or file is empty'); } } catch (error) { if (fs.existsSync(outputPath)) { fs.unlinkSync(outputPath); } throw error; } } async function splitMediaFile( inputPath, chunkDurationInSeconds = 500, requestId = uuidv4(), ) { let tempPath = null; let uniqueOutputPath = null; let inputStream = null; try { uniqueOutputPath = generateUniqueFolderName(); fs.mkdirSync(uniqueOutputPath, { recursive: true }); tempDirectories.set(uniqueOutputPath, { createdAt: Date.now(), requestId, }); // Handle URL downloads with streaming const isUrl = /^(https?|ftp):\/\/[^\s/$.?#].[^\s]*$/i.test(inputPath); if (isUrl) { const urlObj = new URL(ensureEncoded(inputPath)); const originalFileName = path.basename(urlObj.pathname) || 'downloaded_file'; tempPath = path.join(uniqueOutputPath, originalFileName); console.log('Downloading file to:', tempPath); await downloadFile(inputPath, tempPath); inputPath = tempPath; } inputPath = path.resolve(inputPath); if (!fs.existsSync(inputPath)) { throw new Error(`Input file not found: ${inputPath}`); } // Use a larger chunk size for better throughput while still managing memory inputStream = fs.createReadStream(inputPath, { highWaterMark: 4 * 1024 * 1024, // 4MB chunks autoClose: true, }); console.log('Probing file:', inputPath); const metadata = await ffmpegProbe(inputPath); if (!metadata?.format?.duration) { throw new Error('Invalid media file or unable to determine duration'); } const duration = metadata.format.duration; const numChunks = Math.ceil((duration - 1) / chunkDurationInSeconds); console.log( `Processing ${numChunks} chunks of ${chunkDurationInSeconds} seconds each`, ); const chunkResults = new Array(numChunks); // Pre-allocate array to maintain order const chunkOffsets = new Array(numChunks); // Pre-allocate offsets array // Process chunks in parallel with a concurrency limit const CONCURRENT_CHUNKS = Math.min(3, os.cpus().length); // Use CPU count to determine concurrency const chunkPromises = []; for (let i = 0; i < numChunks; i += CONCURRENT_CHUNKS) { const chunkBatch = []; for (let j = 0; j < CONCURRENT_CHUNKS && i + j < numChunks; j++) { const chunkIndex = i + j; const outputFileName = path.join( uniqueOutputPath, `chunk-${chunkIndex + 1}-${path.parse(inputPath).name}.mp3`, ); const offset = chunkIndex * chunkDurationInSeconds; chunkBatch.push( processChunk( inputPath, outputFileName, offset, chunkDurationInSeconds, ) .then((result) => { chunkResults[chunkIndex] = result; // Store in correct position chunkOffsets[chunkIndex] = offset; // Store offset in correct position console.log(`Completed chunk ${chunkIndex + 1}/${numChunks}`); return result; }) .catch((error) => { console.error( `Failed to process chunk ${chunkIndex + 1}:`, error, ); return null; }), ); } // Wait for the current batch to complete before starting the next await Promise.all(chunkBatch); } // Filter out any failed chunks const validChunks = chunkResults.filter(Boolean); const validOffsets = chunkOffsets.filter((_, index) => chunkResults[index]); if (validChunks.length === 0) { throw new Error('No chunks were successfully processed'); } return { chunkPromises: validChunks, chunkOffsets: validOffsets, uniqueOutputPath, }; } catch (err) { if (uniqueOutputPath && fs.existsSync(uniqueOutputPath)) { try { fs.rmSync(uniqueOutputPath, { recursive: true, force: true }); tempDirectories.delete(uniqueOutputPath); } catch (cleanupErr) { console.error('Error during cleanup:', cleanupErr); } } console.error('Error in splitMediaFile:', err); throw new Error(`Error processing media file: ${err.message}`); } finally { if (inputStream) { try { inputStream.destroy(); } catch (err) { console.error('Error closing input stream:', err); } } } } export { splitMediaFile, downloadFile };