UNPKG

@yuchida-tamu/podcast-gen

Version:

AI-Powered Monologue Podcast Generator

116 lines 4.84 kB
import { PodcastGenerationError } from '../utils/errors.js'; import { createMonologuePrompt, NARRATOR_PROMPT } from './prompts.js'; export class MonologueEngine { llmClient; narrator; previousContent; constructor(llmClient) { this.llmClient = llmClient; this.narrator = { name: 'Narrator', personality: 'Engaging podcast host with balanced perspective', voice: 'conversational', systemPrompt: NARRATOR_PROMPT, }; this.previousContent = []; } async generateMonologue(topic, duration = 5) { try { this.previousContent = []; const segments = []; let currentTime = 0; // Calculate target segments based on duration const targetSegments = this.calculateTargetSegments(duration); // Phase 1: Introduction (15% of content) const introSegments = Math.max(1, Math.floor(targetSegments * 0.15)); for (let i = 0; i < introSegments; i++) { const content = await this.generateContent(topic, 'introduction'); const segment = this.createSegment(content, currentTime); segments.push(segment); currentTime += segment.duration; this.previousContent.push(segment); } // Phase 2: Exploration (70% of content) const explorationSegments = Math.floor(targetSegments * 0.7); for (let i = 0; i < explorationSegments; i++) { const content = await this.generateContent(topic, 'exploration'); const segment = this.createSegment(content, currentTime); segments.push(segment); currentTime += segment.duration; this.previousContent.push(segment); } // Phase 3: Conclusion (15% of content) const conclusionSegments = Math.max(1, Math.floor(targetSegments * 0.15)); for (let i = 0; i < conclusionSegments; i++) { const content = await this.generateContent(topic, 'conclusion'); const segment = this.createSegment(content, currentTime); segments.push(segment); currentTime += segment.duration; this.previousContent.push(segment); } return segments; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; throw new PodcastGenerationError(`Failed to generate monologue: ${errorMessage}`, 'monologue'); } } async generateContent(topic, phase) { const systemPrompt = this.narrator.systemPrompt.personality + '\n\n' + this.narrator.systemPrompt.formatInstructions; const userPrompt = createMonologuePrompt(topic, phase, this.previousContent); const request = { systemPrompt, userPrompt, }; const response = await this.llmClient.generateContent(request); return response.content; } createSegment(content, startTime) { const emotion = this.extractEmotion(content); const cleanText = this.cleanText(content); const duration = this.estimateDuration(cleanText); return { timestamp: this.formatTime(startTime), text: cleanText, emotion: emotion, duration: duration, }; } extractEmotion(text) { const emotionMatch = text.match(/\[([^\]]+)\]/); return emotionMatch ? emotionMatch[1] : 'neutral'; } cleanText(text) { // Remove emotion indicators from the main text return text.replace(/\[([^\]]+)\]/g, '').trim(); } /** * * @param duration total duration of an audio episode (in minute) * @returns */ calculateTargetSegments(duration) { // Aim for segments of about 20-30 seconds each return Math.floor((duration * 60) / 30); } estimateDuration(text) { // Estimate speaking time: ~150 words per minute, ~5 characters per word const wordsPerMinute = 150; const charactersPerWord = 5; const charactersPerSecond = (wordsPerMinute * charactersPerWord) / 60; const baseDuration = text.length / charactersPerSecond; // Add some natural variation (±20%) const variation = (Math.random() - 0.5) * 0.4; return Math.max(5, Math.floor(baseDuration * (1 + variation))); } formatTime(seconds) { const minutes = Math.floor(seconds / 60); const remainingSeconds = seconds % 60; return `${minutes.toString().padStart(2, '0')}:${remainingSeconds .toString() .padStart(2, '0')}`; } } //# sourceMappingURL=engine.js.map