UNPKG

@clipwhisperer/common

Version:

ClipWhisperer Common - Shared library providing core utilities, database schemas, authentication, bucket management, and common functionality across all ClipWhisperer microservices

140 lines (128 loc) 5.33 kB
import { z } from "zod"; /** * Speech Mark Schema - represents precise word timing from AWS Polly */ export const speechMarkSchema = z.object({ time: z.number().describe("Start time in seconds"), type: z.enum(["word", "sentence", "ssml"]).describe("Type of speech mark"), start: z.number().describe("Character start position in text"), end: z.number().describe("Character end position in text"), value: z.string().describe("The actual word/text"), }); export type SpeechMark = z.infer<typeof speechMarkSchema>; /** * Narrator Service Response Schema */ export const narratorResponseSchema = z.object({ audioId: z.string().describe("Unique identifier for the audio file"), audioUrl: z.string().describe("URL/path to the generated audio file"), speechMarks: z.array(speechMarkSchema).describe("Array of speech marks with timing"), duration: z.number().describe("Total audio duration in seconds"), wordCount: z.number().describe("Number of words in the text"), metadata: z.object({ voiceId: z.string().describe("AWS Polly voice used"), engine: z.string().describe("AWS Polly engine used"), outputFormat: z.string().describe("Audio output format"), fileSize: z.number().describe("Audio file size in bytes"), }), }); export type NarratorResponse = z.infer<typeof narratorResponseSchema>; /** * Hub to Narrator Request Schema */ export const hubToNarratorRequestSchema = z.object({ text: z.string().min(1).describe("Text to synthesize"), voiceId: z.string().optional().default("Matthew").describe("AWS Polly voice ID"), engine: z.string().optional().default("generative").describe("AWS Polly engine"), outputFormat: z.string().optional().default("mp3").describe("Audio output format"), metadata: z.object({ videoId: z.string().describe("Associated video ID"), requestId: z.string().describe("Hub request ID for tracking"), }), }); export type HubToNarratorRequest = z.infer<typeof hubToNarratorRequestSchema>; /** * Hub to Renderer Request Schema */ export const hubToRendererRequestSchema = z.object({ videoId: z.string().describe("Unique video identifier"), audioUrl: z.string().describe("URL to the audio file from Narrator"), speechMarks: z.array(speechMarkSchema).describe("Precise word timings from Narrator"), backgroundVideoPath: z.string().optional().describe("Path to background video"), config: z.object({ fontSize: z.number().optional().default(48), fontColor: z.string().optional().default("white"), fontFamily: z.string().optional().default("Arial"), backgroundColor: z.string().optional().default("black@0.7"), wordsPerGroup: z.number().optional().default(2).describe("Words to display simultaneously"), minimumDisplayTime: z.number().optional().default(1.0).describe("Minimum seconds each text group is shown"), }).optional(), metadata: z.object({ requestId: z.string().describe("Hub request ID for tracking"), narratorAudioId: z.string().describe("Audio ID from Narrator service"), }), }); export type HubToRendererRequest = z.infer<typeof hubToRendererRequestSchema>; /** * Renderer Response Schema */ export const rendererResponseSchema = z.object({ videoId: z.string().describe("Video identifier"), outputPath: z.string().describe("Path to the rendered video"), duration: z.number().describe("Video duration in seconds"), fileSize: z.number().describe("Video file size in bytes"), wordTimings: z.array(z.object({ word: z.string(), startTime: z.number(), endTime: z.number(), })).describe("Final word timings used in the video"), metadata: z.object({ renderTime: z.number().describe("Time taken to render in seconds"), audioSource: z.string().describe("Source audio URL"), speechMarksCount: z.number().describe("Number of speech marks processed"), }), }); export type RendererResponse = z.infer<typeof rendererResponseSchema>; /** * Complete Workflow Status Schema */ export const workflowStatusSchema = z.object({ videoId: z.string(), status: z.enum([ "pending", "scraping", "scrape_complete", "narrating", "narration_complete", "rendering", "render_complete", "completed", "failed" ]), progress: z.number().min(0).max(100).describe("Progress percentage"), currentStep: z.string().describe("Current processing step"), steps: z.object({ scraping: z.object({ status: z.enum(["pending", "in_progress", "completed", "failed"]), startTime: z.date().optional(), endTime: z.date().optional(), data: z.any().optional(), }), narration: z.object({ status: z.enum(["pending", "in_progress", "completed", "failed"]), startTime: z.date().optional(), endTime: z.date().optional(), data: narratorResponseSchema.optional(), }), rendering: z.object({ status: z.enum(["pending", "in_progress", "completed", "failed"]), startTime: z.date().optional(), endTime: z.date().optional(), data: rendererResponseSchema.optional(), }), }), error: z.string().optional(), createdAt: z.date(), updatedAt: z.date(), }); export type WorkflowStatus = z.infer<typeof workflowStatusSchema>;