@clipwhisperer/common
Version:
ClipWhisperer Common - Shared library providing core utilities, database schemas, authentication, bucket management, and common functionality across all ClipWhisperer microservices
140 lines (128 loc) • 5.33 kB
text/typescript
import { z } from "zod";
/**
* Speech Mark Schema - represents precise word timing from AWS Polly
*/
export const speechMarkSchema = z.object({
time: z.number().describe("Start time in seconds"),
type: z.enum(["word", "sentence", "ssml"]).describe("Type of speech mark"),
start: z.number().describe("Character start position in text"),
end: z.number().describe("Character end position in text"),
value: z.string().describe("The actual word/text"),
});
export type SpeechMark = z.infer<typeof speechMarkSchema>;
/**
* Narrator Service Response Schema
*/
export const narratorResponseSchema = z.object({
audioId: z.string().describe("Unique identifier for the audio file"),
audioUrl: z.string().describe("URL/path to the generated audio file"),
speechMarks: z.array(speechMarkSchema).describe("Array of speech marks with timing"),
duration: z.number().describe("Total audio duration in seconds"),
wordCount: z.number().describe("Number of words in the text"),
metadata: z.object({
voiceId: z.string().describe("AWS Polly voice used"),
engine: z.string().describe("AWS Polly engine used"),
outputFormat: z.string().describe("Audio output format"),
fileSize: z.number().describe("Audio file size in bytes"),
}),
});
export type NarratorResponse = z.infer<typeof narratorResponseSchema>;
/**
* Hub to Narrator Request Schema
*/
export const hubToNarratorRequestSchema = z.object({
text: z.string().min(1).describe("Text to synthesize"),
voiceId: z.string().optional().default("Matthew").describe("AWS Polly voice ID"),
engine: z.string().optional().default("generative").describe("AWS Polly engine"),
outputFormat: z.string().optional().default("mp3").describe("Audio output format"),
metadata: z.object({
videoId: z.string().describe("Associated video ID"),
requestId: z.string().describe("Hub request ID for tracking"),
}),
});
export type HubToNarratorRequest = z.infer<typeof hubToNarratorRequestSchema>;
/**
* Hub to Renderer Request Schema
*/
export const hubToRendererRequestSchema = z.object({
videoId: z.string().describe("Unique video identifier"),
audioUrl: z.string().describe("URL to the audio file from Narrator"),
speechMarks: z.array(speechMarkSchema).describe("Precise word timings from Narrator"),
backgroundVideoPath: z.string().optional().describe("Path to background video"),
config: z.object({
fontSize: z.number().optional().default(48),
fontColor: z.string().optional().default("white"),
fontFamily: z.string().optional().default("Arial"),
backgroundColor: z.string().optional().default("black@0.7"),
wordsPerGroup: z.number().optional().default(2).describe("Words to display simultaneously"),
minimumDisplayTime: z.number().optional().default(1.0).describe("Minimum seconds each text group is shown"),
}).optional(),
metadata: z.object({
requestId: z.string().describe("Hub request ID for tracking"),
narratorAudioId: z.string().describe("Audio ID from Narrator service"),
}),
});
export type HubToRendererRequest = z.infer<typeof hubToRendererRequestSchema>;
/**
* Renderer Response Schema
*/
export const rendererResponseSchema = z.object({
videoId: z.string().describe("Video identifier"),
outputPath: z.string().describe("Path to the rendered video"),
duration: z.number().describe("Video duration in seconds"),
fileSize: z.number().describe("Video file size in bytes"),
wordTimings: z.array(z.object({
word: z.string(),
startTime: z.number(),
endTime: z.number(),
})).describe("Final word timings used in the video"),
metadata: z.object({
renderTime: z.number().describe("Time taken to render in seconds"),
audioSource: z.string().describe("Source audio URL"),
speechMarksCount: z.number().describe("Number of speech marks processed"),
}),
});
export type RendererResponse = z.infer<typeof rendererResponseSchema>;
/**
* Complete Workflow Status Schema
*/
export const workflowStatusSchema = z.object({
videoId: z.string(),
status: z.enum([
"pending",
"scraping",
"scrape_complete",
"narrating",
"narration_complete",
"rendering",
"render_complete",
"completed",
"failed"
]),
progress: z.number().min(0).max(100).describe("Progress percentage"),
currentStep: z.string().describe("Current processing step"),
steps: z.object({
scraping: z.object({
status: z.enum(["pending", "in_progress", "completed", "failed"]),
startTime: z.date().optional(),
endTime: z.date().optional(),
data: z.any().optional(),
}),
narration: z.object({
status: z.enum(["pending", "in_progress", "completed", "failed"]),
startTime: z.date().optional(),
endTime: z.date().optional(),
data: narratorResponseSchema.optional(),
}),
rendering: z.object({
status: z.enum(["pending", "in_progress", "completed", "failed"]),
startTime: z.date().optional(),
endTime: z.date().optional(),
data: rendererResponseSchema.optional(),
}),
}),
error: z.string().optional(),
createdAt: z.date(),
updatedAt: z.date(),
});
export type WorkflowStatus = z.infer<typeof workflowStatusSchema>;