UNPKG

@aj-archipelago/cortex

Version:

Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.

392 lines (341 loc) 17.2 kB
// AzureVideoTranslatePlugin.js import ModelPlugin from "./modelPlugin.js"; import logger from "../../lib/logger.js"; import { publishRequestProgress } from "../../lib/redisSubscription.js"; import crypto from 'crypto'; import axios from 'axios'; import {config} from "../../config.js"; // turn off any caching because we're polling the operation status axios.defaults.cache = false; class AzureVideoTranslatePlugin extends ModelPlugin { static lastProcessingRate = null; // bytes per second static processingRates = []; // Array to store historical processing rates static maxHistorySize = 10; // Maximum number of rates to store constructor(pathway, model) { super(pathway, model); this.subscriptionKey = config.get("azureVideoTranslationApiKey"); this.apiVersion = "2024-05-20-preview"; this.baseUrl = ""; this.startTime = null; this.videoContentLength = null; } async verifyVideoAccess(videoUrl) { try { const response = await axios.head(videoUrl); const contentType = response.headers['content-type']; const contentLength = parseInt(response.headers['content-length'], 10); if (contentType && !contentType.includes('video/mp4')) { logger.warn(`Warning: Video might not be in MP4 format. Content-Type: ${contentType}`); } const TYPICAL_BITRATE = 2.5 * 1024 * 1024; // 2.5 Mbps const durationSeconds = Math.round((contentLength * 8) / TYPICAL_BITRATE); return { isAccessible: true, contentLength, durationSeconds: durationSeconds || 60, isAzureUrl: videoUrl.includes('.blob.core.windows.net') }; } catch (error) { throw new Error(`Failed to access video: ${error.message}`); } } async uploadToFileHandler(videoUrl) { try { // Get the file handler URL from config const fileHandlerUrl = config.get("whisperMediaApiUrl"); if (!fileHandlerUrl) { throw new Error("File handler URL is not configured"); } // Start heartbeat progress updates const heartbeat = setInterval(() => { publishRequestProgress({ requestId: this.requestId, progress: 0, info: 'Uploading and processing video...' }); }, 5000); try { // Start the fetch request const response = await axios.get(fileHandlerUrl, { params: { requestId: this.requestId, fetch: videoUrl } }); if (!response.data?.url) { throw new Error("File handler did not return a valid URL"); } return response.data.url; } finally { // Always clear the heartbeat interval clearInterval(heartbeat); } } catch (error) { logger.error(`Failed to upload video to file handler: ${error.message}`); if (error.response?.data) { logger.error(`Response data: ${JSON.stringify(error.response.data)}`); } throw new Error(`Failed to upload video to file handler: ${error.message}`); } } async createTranslation(params) { const { videoUrl, sourceLanguage, targetLanguage, voiceKind, translationId } = params; const translation = { id: translationId, displayName: `${translationId}.mp4`, description: `Translate video from ${sourceLanguage} to ${targetLanguage}`, input: { sourceLocale: sourceLanguage, targetLocale: targetLanguage, voiceKind: voiceKind, videoFileUrl: videoUrl } }; const url = `${this.baseUrl}/translations/${translationId}?api-version=${this.apiVersion}`; logger.debug(`Creating translation: ${url}`); try { const response = await axios.put(url, translation, { headers: { 'Content-Type': 'application/json', 'Ocp-Apim-Subscription-Key': this.subscriptionKey, } }); const operationUrl = response.headers['operation-location']; return { translation: response.data, operationUrl }; } catch (error) { const errorText = error.response?.data?.error?.innererror?.message || error.message; throw new Error(`Failed to create translation: ${errorText}`); } } async getTranslationStatus(translationId) { const url = `${this.baseUrl}/translations/${translationId}?api-version=${this.apiVersion}`; try { const response = await axios.get(url, { headers: { 'Ocp-Apim-Subscription-Key': this.subscriptionKey, } }); return response.data; } catch (error) { throw new Error(`Failed to get translation status: ${error.message}`); } } async getIterationStatus(translationId, iterationId) { const url = `${this.baseUrl}/translations/${translationId}/iterations/${iterationId}?api-version=${this.apiVersion}`; try { const response = await axios.get(url, { headers: { 'Ocp-Apim-Subscription-Key': this.subscriptionKey, } }); return response.data; } catch (error) { const errorText = error.response?.data?.error?.innererror?.message || error.message; throw new Error(`Failed to get iteration status: ${errorText}`); } } async pollOperation(operationUrl) { try { const response = await axios.get(operationUrl, { headers: { 'Ocp-Apim-Subscription-Key': this.subscriptionKey, } }); return response.data; } catch (error) { const errorText = error.response?.data?.error?.innererror?.message || error.message; throw new Error(`Failed to poll operation: ${errorText}`); } } async monitorOperation(operationUrlOrConfig, entityType = 'operation') { let estimatedTotalTime = 0; if (AzureVideoTranslatePlugin.lastProcessingRate && this.videoContentLength) { estimatedTotalTime = this.videoContentLength / AzureVideoTranslatePlugin.lastProcessingRate; } else { // First run: estimate based on 2x calculated video duration estimatedTotalTime = 2 * (this.videoContentLength * 8) / (2.5 * 1024 * 1024); } // eslint-disable-next-line no-constant-condition while (true) { let status; if (typeof operationUrlOrConfig === 'string') { const operation = await this.pollOperation(operationUrlOrConfig); status = operation; } else { const { translationId, iterationId } = operationUrlOrConfig; const iteration = await this.getIterationStatus(translationId, iterationId); status = iteration; } logger.debug(`${entityType} status: ${JSON.stringify(status, null, 2)}`); let progress = 0; let estimatedProgress = 0; let progressMessage = ''; switch (entityType) { case 'translation': progressMessage = 'Getting ready to translate video...'; break; case 'iteration': if (status.status === 'NotStarted') { progressMessage = 'Waiting for translation to start...'; } else if (status.status === 'Running') { progressMessage = 'Translating video...'; if (this.startTime) { // Calculate progress based on elapsed time const elapsedSeconds = (Date.now() - this.startTime) / 1000; estimatedProgress = Math.min(0.95, elapsedSeconds / estimatedTotalTime); const remainingSeconds = Math.max(0, estimatedTotalTime - elapsedSeconds); if (remainingSeconds > 0) { if (remainingSeconds < 60) { const roundedSeconds = Math.ceil(remainingSeconds); progressMessage = `Translating video... ${roundedSeconds} second${roundedSeconds !== 1 ? 's' : ''} remaining`; } else { const remainingMinutes = Math.ceil(remainingSeconds / 60); progressMessage = `Translating video... ${remainingMinutes} minute${remainingMinutes !== 1 ? 's' : ''} remaining`; } } progress = status.percentComplete ? status.percentComplete / 100 : estimatedProgress; } else { this.startTime = Date.now(); estimatedProgress = 0; } } else if (status.status === 'Succeeded') { progressMessage = 'Video translation complete.'; } else if (status.status === 'Failed') { progressMessage = 'Video translation failed.'; } break; } // Publish progress updates publishRequestProgress({ requestId: this.requestId, progress, info: progressMessage }); if (status.status === 'Succeeded') { return status; } else if (status.status === 'Failed') { throw new Error(`${entityType} failed: ${status.error?.message || 'Unknown error'}`); } await new Promise(resolve => setTimeout(resolve, 5000)); } } async getTranslationOutput(translationId, iterationId) { const iteration = await this.getIterationStatus(translationId, iterationId); const translation = await this.getTranslationStatus(translationId); if (iteration.result) { const targetLocale = translation.input.targetLocale; return { outputVideoSubtitleWebVttFileUrl: iteration.result.sourceLocaleSubtitleWebvttFileUrl, targetLocales: { [targetLocale]: { outputVideoFileUrl: iteration.result.translatedVideoFileUrl, outputVideoSubtitleWebVttFileUrl: iteration.result.targetLocaleSubtitleWebvttFileUrl } } }; } return null; } getRequestParameters(_, parameters, __) { const excludedParameters = [ 'text', 'parameters', 'prompt', 'promptParameters', 'previousResult', 'stream', 'memoryContext' ]; return Object.fromEntries( Object.entries(parameters).filter(([key, value]) => !excludedParameters.includes(key) && value !== '' && typeof value !== 'undefined' ) ); } async execute(text, parameters, prompt, cortexRequest) { if (!this.subscriptionKey) { throw new Error("Azure Video Translation subscription key is not set"); } this.requestId = cortexRequest.requestId; this.baseUrl = cortexRequest.url; const requestParameters = this.getRequestParameters(text, parameters, prompt); try { const translationId = `cortex-translation-${this.requestId}`; let videoUrl = requestParameters.sourcevideooraudiofilepath; const sourceLanguage = requestParameters.sourcelocale; const targetLanguage = requestParameters.targetlocale; const voiceKind = requestParameters.voicekind || 'PlatformVoice'; const embedSubtitles = requestParameters.withoutsubtitleintranslatedvideofile === "false" ? true : false; const speakerCount = parseInt(requestParameters.speakercount) || 0; // Verify video access and get duration const videoInfo = await this.verifyVideoAccess(videoUrl); this.videoContentLength = videoInfo.contentLength; logger.debug(`Video info: ${JSON.stringify(videoInfo, null, 2)}`); // If the video is not from Azure storage, upload it to file handler if (!videoInfo.isAzureUrl) { logger.debug('Video is not from Azure storage, uploading to file handler...'); videoUrl = await this.uploadToFileHandler(videoUrl); logger.debug(`Video uploaded to file handler: ${videoUrl}`); } // Create translation const { operationUrl } = await this.createTranslation({ videoUrl, sourceLanguage, targetLanguage, voiceKind, translationId }); logger.debug(`Starting translation monitoring with operation URL: ${operationUrl}`); // Monitor translation creation const operationStatus = await this.monitorOperation(operationUrl, 'translation'); logger.debug(`Translation operation completed with status: ${JSON.stringify(operationStatus, null, 2)}`); const updatedTranslation = await this.getTranslationStatus(translationId); logger.debug(`Translation status after operation: ${JSON.stringify(updatedTranslation, null, 2)}`); // Create iteration const iteration = { id: crypto.randomUUID(), displayName: translationId, input: { subtitleMaxCharCountPerSegment: 42, exportSubtitleInVideo: embedSubtitles, ...(speakerCount > 0 && { speakerCount }) } }; logger.debug(`Creating iteration: ${JSON.stringify(iteration, null, 2)}`); const iterationUrl = `${this.baseUrl}/translations/${translationId}/iterations/${iteration.id}?api-version=${this.apiVersion}`; try { const iterationResponse = await axios.put(iterationUrl, iteration, { headers: { 'Content-Type': 'application/json', 'Ocp-Apim-Subscription-Key': this.subscriptionKey, 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' } }); const iterationOperationUrl = iterationResponse.headers['operation-location']; await this.monitorOperation(iterationOperationUrl, 'iteration'); // Update processing rate for future estimates const totalSeconds = (Date.now() - this.startTime) / 1000; const newRate = this.videoContentLength / totalSeconds; AzureVideoTranslatePlugin.updateProcessingRate(newRate); logger.debug(`Updated processing rate: ${AzureVideoTranslatePlugin.lastProcessingRate} bytes/second (from ${newRate} bytes/second)`); const output = await this.getTranslationOutput(translationId, iteration.id); return JSON.stringify(output); } catch (error) { const errorText = error.response?.data?.error?.innererror?.message || error.message; throw new Error(`Failed to create iteration: ${errorText}`); } } catch (error) { logger.error(`Error in video translation: ${error.message}`); throw error; } } static updateProcessingRate(newRate) { // Add new rate to history AzureVideoTranslatePlugin.processingRates.push(newRate); // Keep only the last maxHistorySize entries if (AzureVideoTranslatePlugin.processingRates.length > AzureVideoTranslatePlugin.maxHistorySize) { AzureVideoTranslatePlugin.processingRates.shift(); } // Calculate weighted average - more recent measurements have higher weight const sum = AzureVideoTranslatePlugin.processingRates.reduce((acc, rate, index) => { const weight = index + 1; // Weight increases with recency return acc + (rate * weight); }, 0); const weightSum = AzureVideoTranslatePlugin.processingRates.reduce((acc, _, index) => acc + (index + 1), 0); AzureVideoTranslatePlugin.lastProcessingRate = sum / weightSum; } } export default AzureVideoTranslatePlugin;