@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
268 lines • 10.7 kB
JavaScript
/**
* Replicate Avatar Handler (MuseTalk default)
*
* Routes avatar / lip-sync generation through Replicate's universal
* prediction lifecycle. Default model is MuseTalk; other lip-sync models
* (SadTalker, Wav2Lip, etc.) can be selected via `options.model`.
*
* @module avatar/providers/ReplicateAvatar
* @see https://replicate.com/douwantech/musetalk
*/
import { ErrorCategory, ErrorSeverity } from "../../constants/enums.js";
import { AVATAR_ERROR_CODES, AvatarError, } from "../../utils/avatarProcessor.js";
import { logger } from "../../utils/logger.js";
import { getReplicateAuth } from "../../adapters/replicate/auth.js";
import { downloadPredictionOutput, predict, } from "../../adapters/replicate/predictionLifecycle.js";
import { MAX_AUDIO_BYTES, MAX_IMAGE_BYTES, MAX_VIDEO_BYTES, readBoundedBuffer, } from "../../utils/sizeGuard.js";
import { assertSafeUrl } from "../../utils/ssrfGuard.js";
const DEFAULT_MODEL = "douwantech/musetalk:5501004e78525e4bbd9fa20d1e75ad51fddce5a274bec07b9b16d685e34eeaf8";
/**
* Replicate Avatar Handler.
*
* MuseTalk requires both `image` and `audio` inputs — `text`-only is not
* supported here (use D-ID for that, or chain TTS + this handler).
*/
export class ReplicateAvatar {
maxAudioDurationSeconds = 60;
supportedFormats = ["mp4"];
isConfigured() {
return getReplicateAuth() !== null;
}
async generate(options) {
const auth = getReplicateAuth();
if (!auth) {
throw new AvatarError({
code: AVATAR_ERROR_CODES.PROVIDER_NOT_CONFIGURED,
message: "REPLICATE_API_TOKEN not configured",
category: ErrorCategory.CONFIGURATION,
severity: ErrorSeverity.HIGH,
retriable: false,
});
}
if (!options.audio) {
throw new AvatarError({
code: AVATAR_ERROR_CODES.AUDIO_REQUIRED,
message: "Replicate avatar handler (MuseTalk) requires `audio` (Buffer or path); text-only is not supported. Use D-ID for text-driven talks or chain TTS + Replicate.",
category: ErrorCategory.VALIDATION,
severity: ErrorSeverity.MEDIUM,
retriable: false,
});
}
const startTime = Date.now();
const model = options.model ?? DEFAULT_MODEL;
const imageBuffer = await this.resolveBuffer(options.image, MAX_IMAGE_BYTES, "Replicate avatar reference image");
const audioBuffer = await this.resolveBuffer(options.audio, MAX_AUDIO_BYTES, "Replicate avatar reference audio");
const imageDataUri = `data:image/${this.detectImageType(imageBuffer)};base64,${imageBuffer.toString("base64")}`;
const audioDataUri = `data:audio/${this.detectAudioType(audioBuffer)};base64,${audioBuffer.toString("base64")}`;
let prediction;
try {
prediction = await predict(auth, {
model,
input: {
image: imageDataUri,
audio: audioDataUri,
bbox_shift: 0,
fps: 25,
},
});
}
catch (err) {
throw new AvatarError({
code: AVATAR_ERROR_CODES.GENERATION_FAILED,
message: `Replicate avatar generation failed: ${err instanceof Error ? err.message : String(err)}`,
category: ErrorCategory.EXECUTION,
severity: ErrorSeverity.HIGH,
retriable: true,
context: { model },
originalError: err instanceof Error ? err : undefined,
});
}
let videoBuffer;
try {
videoBuffer = await downloadPredictionOutput(prediction, MAX_VIDEO_BYTES);
}
catch (err) {
throw new AvatarError({
code: AVATAR_ERROR_CODES.GENERATION_FAILED,
message: `Replicate avatar download failed: ${err instanceof Error ? err.message : String(err)}`,
category: ErrorCategory.NETWORK,
severity: ErrorSeverity.MEDIUM,
retriable: true,
context: { predictionId: prediction.id },
originalError: err instanceof Error ? err : undefined,
});
}
const latency = Date.now() - startTime;
logger.info(`[ReplicateAvatar] Generated ${videoBuffer.length} bytes in ${latency}ms — model ${model}`);
return {
buffer: videoBuffer,
format: "mp4",
size: videoBuffer.length,
provider: "replicate",
metadata: {
latency,
provider: "replicate",
model,
jobId: prediction.id,
},
};
}
async resolveBuffer(input, maxBytes = MAX_IMAGE_BYTES, label = "Replicate avatar input") {
if (Buffer.isBuffer(input)) {
if (input.length > maxBytes) {
throw new AvatarError({
code: AVATAR_ERROR_CODES.INVALID_INPUT,
message: `${label} too large: ${input.length} bytes exceeds ${maxBytes}`,
category: ErrorCategory.VALIDATION,
severity: ErrorSeverity.HIGH,
retriable: false,
});
}
return input;
}
// Reject local file paths — only Buffer or HTTPS URLs are accepted.
if (!/^https:\/\//.test(input)) {
throw new AvatarError({
code: AVATAR_ERROR_CODES.INVALID_INPUT,
message: `Invalid input: expected Buffer or HTTPS URL, got string "${input}". Local file reads are not supported.`,
category: ErrorCategory.VALIDATION,
severity: ErrorSeverity.HIGH,
retriable: false,
});
}
try {
await assertSafeUrl(input);
}
catch (err) {
throw new AvatarError({
code: AVATAR_ERROR_CODES.INVALID_INPUT,
message: `Unsafe URL rejected: ${err instanceof Error ? err.message : String(err)}`,
category: ErrorCategory.VALIDATION,
severity: ErrorSeverity.HIGH,
retriable: false,
context: { url: input },
});
}
const FETCH_TIMEOUT_MS = 60_000;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
let r;
try {
r = await fetch(input, { signal: controller.signal });
}
catch (err) {
if (err instanceof Error && err.name === "AbortError") {
throw new AvatarError({
code: AVATAR_ERROR_CODES.INVALID_INPUT,
message: `Replicate avatar input fetch timed out after ${FETCH_TIMEOUT_MS / 1000}s: ${input}`,
category: ErrorCategory.NETWORK,
severity: ErrorSeverity.MEDIUM,
retriable: true,
});
}
throw err;
}
finally {
clearTimeout(timeoutId);
}
if (!r.ok) {
throw new AvatarError({
code: AVATAR_ERROR_CODES.INVALID_INPUT,
message: `Failed to fetch ${input}: ${r.status}`,
category: ErrorCategory.NETWORK,
severity: ErrorSeverity.MEDIUM,
retriable: r.status >= 500,
});
}
try {
return await readBoundedBuffer(r, maxBytes, label);
}
catch (err) {
throw new AvatarError({
code: AVATAR_ERROR_CODES.INVALID_INPUT,
message: `${label} too large: ${err instanceof Error ? err.message : String(err)}`,
category: ErrorCategory.VALIDATION,
severity: ErrorSeverity.HIGH,
retriable: false,
context: { url: input },
});
}
}
/**
* Detect audio MIME subtype from magic bytes.
*
* - WAV : "RIFF" header (52 49 46 46)
* - MP3 : ID3 tag (49 44 33) or sync word 0xFF 0xFB/0xF3/0xF2
* - OGG : "OggS" capture pattern (4F 67 67 53)
* - M4A : "ftyp" box at offset 4 (common isom/M4A variant)
*
* Falls back to "mp3" when detection is inconclusive.
*/
detectAudioType(buffer) {
if (buffer.length < 4) {
return "mp3";
}
// WAV: starts with RIFF
if (buffer[0] === 0x52 &&
buffer[1] === 0x49 &&
buffer[2] === 0x46 &&
buffer[3] === 0x46) {
return "wav";
}
// OGG: starts with OggS
if (buffer[0] === 0x4f &&
buffer[1] === 0x67 &&
buffer[2] === 0x67 &&
buffer[3] === 0x53) {
return "ogg";
}
// MP3: ID3 header
if (buffer[0] === 0x49 && buffer[1] === 0x44 && buffer[2] === 0x33) {
return "mp3";
}
// MP3: MPEG sync word (0xFF 0xE0–0xFF)
if (buffer[0] === 0xff && (buffer[1] & 0xe0) === 0xe0) {
return "mpeg";
}
// M4A / AAC: "ftyp" box at offset 4
if (buffer.length >= 8 &&
buffer[4] === 0x66 &&
buffer[5] === 0x74 &&
buffer[6] === 0x79 &&
buffer[7] === 0x70) {
return "mp4";
}
return "mp3";
}
detectImageType(buffer) {
if (buffer.length < 4) {
return "jpeg";
}
if (buffer[0] === 0x89 && buffer[1] === 0x50) {
return "png";
}
if (buffer[0] === 0xff && buffer[1] === 0xd8) {
return "jpeg";
}
// RIFF container: disambiguate WebP (WEBP at offset 8) from WAV (WAVE at
// offset 8) so audio data passed as image is not silently misidentified.
if (buffer.length >= 12 &&
buffer[0] === 0x52 &&
buffer[1] === 0x49 &&
buffer[2] === 0x46 &&
buffer[3] === 0x46) {
// "WEBP" → image/webp
if (buffer[8] === 0x57 &&
buffer[9] === 0x45 &&
buffer[10] === 0x42 &&
buffer[11] === 0x50) {
return "webp";
}
// Any other RIFF type (e.g. WAVE) is not a valid image → fall through to
// the default so the caller's validation can flag the wrong content type.
return "jpeg";
}
return "jpeg";
}
}
//# sourceMappingURL=ReplicateAvatar.js.map